示例#1
0
def map_locations(job, settings):
    # type: (Job, SettingsType) -> None
    """
    Maps directory locations between :mod:`pywps` process execution and produced jobs storage.

    Generates symlink references from the Job UUID to PyWPS UUID results (outputs directory, status and log locations).
    Update the Job's WPS ID if applicable (job executed locally).
    Assumes that all results are located under the same reference UUID.
    """
    local_path = get_wps_local_status_location(job.status_location, settings)
    if not local_path:
        LOGGER.debug("Not possible to map Job to WPS locations.")
        return
    base_dir, status_xml = os.path.split(local_path)
    job.wps_id = os.path.splitext(status_xml)[0]
    wps_loc = os.path.join(base_dir, str(job.wps_id))
    job_loc = os.path.join(base_dir, str(job.id))
    if wps_loc == job_loc:
        LOGGER.debug("Job already refers to WPS locations.")
        return
    for loc_ext in ["", ".log", ".xml"]:
        wps_ref = wps_loc + loc_ext
        job_ref = job_loc + loc_ext
        if os.path.exists(
                wps_ref
        ):  # possible that there are no results (e.g.: failed job)
            os.symlink(wps_ref, job_ref)
示例#2
0
文件: service.py 项目: 00mjk/weaver
 def __init__(self, url_location, settings):
     # type: (str, SettingsType) -> None
     self._url = url_location
     # location might not exist yet based on worker execution timing
     self._file = get_wps_local_status_location(url_location,
                                                settings,
                                                must_exist=False)
示例#3
0
    def execute(self, identifier, wps_request, uuid):
        # type: (str, Union[WPSRequest, WorkerRequest], str) -> Union[WPSResponse, HTTPValid]
        """
        Handles the ``Execute`` KVP/XML request submitted on the WPS endpoint.

        Submit WPS request to corresponding WPS-REST endpoint and convert back for requested ``Accept`` content-type.

        Overrides the original execute operation, that will instead be handled by :meth:`execute_job` following
        callback from Celery Worker, which handles process job creation and monitoring.

        If ``Accept`` is JSON, the result is directly returned from :meth:`_submit_job`.
        If ``Accept`` is XML or undefined, :class:`WorkerExecuteResponse` converts the received JSON with XML template.
        """
        result = self._submit_job(wps_request)
        if not isinstance(result, dict):
            return result  # pre-built HTTP response with JSON contents when requested

        # otherwise, recreate the equivalent content with expected XML template format
        job_id = result["jobID"]
        wps_process = self.processes.get(wps_request.identifier)

        # because we are building the XML response (and JSON not explicitly requested)
        # caller is probably a WPS-1 client also expecting a status XML file
        # remap the status location accordingly from the current REST endpoint
        job_url = result["location"]
        if urlparse(job_url).path.endswith(f"/jobs/{job_id}"):
            # file status does not exist yet since client calling this method is waiting for it
            # pywps will generate it once the WorkerExecuteResponse is returned
            status_path = get_wps_local_status_location(job_url,
                                                        self.settings,
                                                        must_exist=False)
            wps_dir = get_wps_output_dir(self.settings)
            wps_url = get_wps_output_url(self.settings)
            job_url = status_path.replace(wps_dir, wps_url, 1)

        # when called by the WSGI app, 'WorkerExecuteResponse.__call__' will generate the XML from 'doc' property,
        # which itself is generated by template substitution of data from above 'json' property
        try:
            return WorkerExecuteResponse(wps_request,
                                         job_id,
                                         wps_process,
                                         job_url,
                                         settings=self.settings)
        except Exception as ex:  # noqa
            LOGGER.exception(
                "Error building XML response by PyWPS Service during WPS Execute result from worker."
            )
            message = f"Failed building XML response from WPS Execute result. Error [{ex!r}]"
            raise OWSNoApplicableCode(message, locator=job_id)
示例#4
0
def execute_process(task, job_id, wps_url, headers=None):
    # type: (Task, UUID, str, Optional[HeadersType]) -> StatusType
    """
    Celery task that executes the WPS process job monitoring as status updates (local and remote).
    """
    from weaver.wps.service import get_pywps_service

    LOGGER.debug("Job execute process called.")

    task_process = get_celery_process()
    rss_start = task_process.memory_info().rss
    registry = get_registry(
        None)  # local thread, whether locally or dispatched celery
    settings = get_settings(registry)
    db = get_db(
        registry, reset_connection=True
    )  # reset the connection because we are in a forked celery process
    store = db.get_store(StoreJobs)
    job = store.fetch_by_id(job_id)
    job.started = now()
    job.status = Status.STARTED  # will be mapped to 'RUNNING'
    job.status_message = f"Job {Status.STARTED}."  # will preserve detail of STARTED vs RUNNING
    job.save_log(message=job.status_message)

    task_logger = get_task_logger(__name__)
    job.save_log(logger=task_logger, message="Job task setup initiated.")
    load_pywps_config(settings)
    job.progress = JobProgress.SETUP
    job.task_id = task.request.id
    job.save_log(logger=task_logger, message="Job task setup completed.")
    job = store.update_job(job)

    # Flag to keep track if job is running in background (remote-WPS, CWL app, etc.).
    # If terminate signal is sent to worker task via API dismiss request while still running in background,
    # the raised exception within the task will switch the job to Status.FAILED, but this will not raise an
    # exception here. Since the task execution 'succeeds' without raising, it skips directly to the last 'finally'.
    # Patch it back to Status.DISMISSED in this case.
    task_terminated = True

    try:
        job.progress = JobProgress.DESCRIBE
        job.save_log(logger=task_logger,
                     message=f"Employed WPS URL: [{wps_url!s}]",
                     level=logging.DEBUG)
        job.save_log(
            logger=task_logger,
            message=f"Execute WPS request for process [{job.process!s}]")
        wps_process = fetch_wps_process(job, wps_url, headers, settings)

        # prepare inputs
        job.progress = JobProgress.GET_INPUTS
        job.save_log(logger=task_logger,
                     message="Fetching job input definitions.")
        wps_inputs = parse_wps_inputs(wps_process, job)

        # prepare outputs
        job.progress = JobProgress.GET_OUTPUTS
        job.save_log(logger=task_logger,
                     message="Fetching job output definitions.")
        wps_outputs = [(o.identifier, o.dataType == WPS_COMPLEX_DATA)
                       for o in wps_process.processOutputs]

        # if process refers to a remote WPS provider, pass it down to avoid unnecessary re-fetch request
        if job.is_local:
            process = None  # already got all the information needed pre-loaded in PyWPS service
        else:
            service = Service(name=job.service, url=wps_url)
            process = Process.from_ows(wps_process, service, settings)

        job.progress = JobProgress.EXECUTE_REQUEST
        job.save_log(logger=task_logger,
                     message="Starting job process execution.")
        job.save_log(
            logger=task_logger,
            message=
            "Following updates could take a while until the Application Package answers..."
        )

        wps_worker = get_pywps_service(environ=settings, is_worker=True)
        execution = wps_worker.execute_job(job,
                                           wps_inputs=wps_inputs,
                                           wps_outputs=wps_outputs,
                                           remote_process=process,
                                           headers=headers)
        if not execution.process and execution.errors:
            raise execution.errors[0]

        # adjust status location
        wps_status_path = get_wps_local_status_location(
            execution.statusLocation, settings)
        job.progress = JobProgress.EXECUTE_STATUS_LOCATION
        LOGGER.debug("WPS status location that will be queried: [%s]",
                     wps_status_path)
        if not wps_status_path.startswith("http") and not os.path.isfile(
                wps_status_path):
            LOGGER.warning(
                "WPS status location not resolved to local path: [%s]",
                wps_status_path)
        job.save_log(
            logger=task_logger,
            level=logging.DEBUG,
            message=f"Updated job status location: [{wps_status_path}].")

        job.status = Status.RUNNING
        job.status_message = execution.statusMessage or f"{job!s} initiation done."
        job.status_location = wps_status_path
        job.request = execution.request
        job.response = execution.response
        job.progress = JobProgress.EXECUTE_MONITOR_START
        job.save_log(logger=task_logger,
                     message="Starting monitoring of job execution.")
        job = store.update_job(job)

        max_retries = 5
        num_retries = 0
        run_step = 0
        while execution.isNotComplete() or run_step == 0:
            if num_retries >= max_retries:
                job.save_log(errors=execution.errors, logger=task_logger)
                job = store.update_job(job)
                raise Exception(
                    f"Could not read status document after {max_retries} retries. Giving up."
                )
            try:
                # NOTE:
                #   Don't actually log anything here until process is completed (success or fail) so that underlying
                #   WPS execution logs can be inserted within the current job log and appear continuously.
                #   Only update internal job fields in case they get referenced elsewhere.
                progress_min = JobProgress.EXECUTE_MONITOR_LOOP
                progress_max = JobProgress.EXECUTE_MONITOR_DONE
                job.progress = progress_min
                run_delay = wait_secs(run_step)
                execution = check_wps_status(location=wps_status_path,
                                             settings=settings,
                                             sleep_secs=run_delay)
                job_msg = (execution.statusMessage or "").strip()
                job.response = execution.response
                job.status = map_status(execution.getStatus())
                job_status_msg = job_msg or "n/a"
                job_percent = execution.percentCompleted
                job.status_message = f"Job execution monitoring (progress: {job_percent}%, status: {job_status_msg})."

                if execution.isComplete():
                    msg_progress = f" (status: {job_msg})" if job_msg else ""
                    if execution.isSucceded():
                        wps_package.retrieve_package_job_log(
                            execution, job, progress_min, progress_max)
                        job.status = map_status(Status.SUCCEEDED)
                        job.status_message = f"Job succeeded{msg_progress}."
                        job.progress = progress_max
                        job.save_log(logger=task_logger)
                        job_results = [
                            ows2json_output_data(output, process, settings)
                            for output in execution.processOutputs
                        ]
                        job.results = make_results_relative(
                            job_results, settings)
                    else:
                        task_logger.debug("Job failed.")
                        wps_package.retrieve_package_job_log(
                            execution, job, progress_min, progress_max)
                        job.status_message = f"Job failed{msg_progress}."
                        job.progress = progress_max
                        job.save_log(errors=execution.errors,
                                     logger=task_logger)
                    task_logger.debug(
                        "Mapping Job references with generated WPS locations.")
                    map_locations(job, settings)
                    job = store.update_job(job)

            except Exception as exc:
                num_retries += 1
                task_logger.debug("Exception raised: %s", repr(exc))
                job.status_message = f"Could not read status XML document for {job!s}. Trying again..."
                job.save_log(errors=execution.errors, logger=task_logger)
                job = store.update_job(job)
                sleep(1)
            else:
                num_retries = 0
                run_step += 1
            finally:
                task_terminated = False  # reached only if WPS execution completed (worker not terminated beforehand)
                job = store.update_job(job)

    except Exception as exc:
        # if 'execute_job' finishes quickly before even reaching the 'monitoring loop'
        # consider WPS execution produced an error (therefore Celery worker not terminated)
        task_terminated = False
        LOGGER.exception("Failed running [%s]", job)
        LOGGER.debug("Failed job [%s] raised an exception.", job, exc_info=exc)
        # note: don't update the progress here to preserve last one that was set
        job.status = map_status(Status.FAILED)
        job.status_message = f"Failed to run {job!s}."
        errors = f"{fully_qualified_name(exc)}: {exc!s}"
        job.save_log(errors=errors, logger=task_logger)
        job = store.update_job(job)
    finally:
        # if task worker terminated, local 'job' is out of date compared to remote/background runner last update
        job = store.fetch_by_id(job.id)
        if task_terminated and map_status(job.status) == Status.FAILED:
            job.status = Status.DISMISSED
        task_success = map_status(
            job.status) not in JOB_STATUS_CATEGORIES[StatusCategory.FAILED]
        collect_statistics(task_process, settings, job, rss_start)
        if task_success:
            job.progress = JobProgress.EXECUTE_MONITOR_END
        job.status_message = f"Job {job.status}."
        job.save_log(logger=task_logger)

        if task_success:
            job.progress = JobProgress.NOTIFY
        send_job_complete_notification_email(job, task_logger, settings)

        if job.status not in JOB_STATUS_CATEGORIES[StatusCategory.FINISHED]:
            job.status = Status.SUCCEEDED
        job.status_message = f"Job {job.status}."
        job.mark_finished()
        if task_success:
            job.progress = JobProgress.DONE
        job.save_log(logger=task_logger, message="Job task complete.")
        job = store.update_job(job)

    return job.status
示例#5
0
def execute_process(self, job_id, url, headers=None):
    from weaver.wps.service import get_pywps_service

    LOGGER.debug("Job execute process called.")
    settings = get_settings(app)
    task_logger = get_task_logger(__name__)
    load_pywps_config(settings)

    task_logger.debug("Job task setup.")

    # reset the connection because we are in a forked celery process
    db = get_db(app, reset_connection=True)
    store = db.get_store(StoreJobs)

    job = store.fetch_by_id(job_id)
    job.task_id = self.request.id
    job.progress = JOB_PROGRESS_SETUP
    job.save_log(logger=task_logger, message="Job task setup completed.")
    job = store.update_job(job)

    try:
        try:
            job.progress = JOB_PROGRESS_DESCRIBE
            job.save_log(
                logger=task_logger,
                message="Execute WPS request for process [{!s}]".format(
                    job.process))
            ssl_verify = get_ssl_verify_option("get", url, settings=settings)
            wps = WebProcessingService(url=url,
                                       headers=get_cookie_headers(headers),
                                       verify=ssl_verify)
            set_wps_language(wps, accept_language=job.accept_language)
            raise_on_xml_exception(wps._capabilities)  # noqa
        except Exception as ex:
            raise OWSNoApplicableCode(
                "Failed to retrieve WPS capabilities. Error: [{}].".format(
                    str(ex)))
        try:
            process = wps.describeprocess(job.process)
        except Exception as ex:
            raise OWSNoApplicableCode(
                "Failed to retrieve WPS process description. Error: [{}].".
                format(str(ex)))

        # prepare inputs
        job.progress = JOB_PROGRESS_GET_INPUTS
        job.save_log(logger=task_logger,
                     message="Fetching job input definitions.")
        complex_inputs = []
        for process_input in process.dataInputs:
            if WPS_COMPLEX_DATA in process_input.dataType:
                complex_inputs.append(process_input.identifier)

        try:
            wps_inputs = list()
            for process_input in job.inputs:
                input_id = get_any_id(process_input)
                process_value = get_any_value(process_input)
                # in case of array inputs, must repeat (id,value)
                input_values = process_value if isinstance(
                    process_value, list) else [process_value]

                # we need to support file:// scheme but PyWPS doesn't like them so remove the scheme file://
                input_values = [
                    val[7:] if str(val).startswith("file://") else val
                    for val in input_values
                ]

                # need to use ComplexDataInput structure for complex input
                # need to use literal String for anything else than complex
                # TODO: BoundingBox not supported
                wps_inputs.extend([
                    (input_id, ComplexDataInput(input_value)
                     if input_id in complex_inputs else str(input_value))
                    for input_value in input_values
                ])
        except KeyError:
            wps_inputs = []

        # prepare outputs
        job.progress = JOB_PROGRESS_GET_OUTPUTS
        job.save_log(logger=task_logger,
                     message="Fetching job output definitions.")
        wps_outputs = [(o.identifier, o.dataType == WPS_COMPLEX_DATA)
                       for o in process.processOutputs]

        mode = EXECUTE_MODE_ASYNC if job.execute_async else EXECUTE_MODE_SYNC
        job.progress = JOB_PROGRESS_EXECUTE_REQUEST
        job.save_log(logger=task_logger,
                     message="Starting job process execution.")
        job.save_log(
            logger=task_logger,
            message=
            "Following updates could take a while until the Application Package answers..."
        )

        wps_worker = get_pywps_service(environ=settings, is_worker=True)
        execution = wps_worker.execute_job(job.process,
                                           wps_inputs=wps_inputs,
                                           wps_outputs=wps_outputs,
                                           mode=mode,
                                           job_uuid=job.id)
        if not execution.process and execution.errors:
            raise execution.errors[0]

        # adjust status location
        wps_status_path = get_wps_local_status_location(
            execution.statusLocation, settings)
        job.progress = JOB_PROGRESS_EXECUTE_STATUS_LOCATION
        LOGGER.debug("WPS status location that will be queried: [%s]",
                     wps_status_path)
        if not wps_status_path.startswith("http") and not os.path.isfile(
                wps_status_path):
            LOGGER.warning(
                "WPS status location not resolved to local path: [%s]",
                wps_status_path)
        job.save_log(logger=task_logger,
                     level=logging.DEBUG,
                     message="Updated job status location: [{}].".format(
                         wps_status_path))

        job.status = map_status(STATUS_STARTED)
        job.status_message = execution.statusMessage or "{} initiation done.".format(
            str(job))
        job.status_location = wps_status_path
        job.request = execution.request
        job.response = execution.response
        job.progress = JOB_PROGRESS_EXECUTE_MONITOR_START
        job.save_log(logger=task_logger,
                     message="Starting monitoring of job execution.")
        job = store.update_job(job)

        max_retries = 5
        num_retries = 0
        run_step = 0
        while execution.isNotComplete() or run_step == 0:
            if num_retries >= max_retries:
                raise Exception(
                    "Could not read status document after {} retries. Giving up."
                    .format(max_retries))
            try:
                # NOTE:
                #   Don't actually log anything here until process is completed (success or fail) so that underlying
                #   WPS execution logs can be inserted within the current job log and appear continuously.
                #   Only update internal job fields in case they get referenced elsewhere.
                job.progress = JOB_PROGRESS_EXECUTE_MONITOR_LOOP
                execution = check_wps_status(location=wps_status_path,
                                             settings=settings,
                                             sleep_secs=wait_secs(run_step))
                job_msg = (execution.statusMessage or "").strip()
                job.response = execution.response
                job.status = map_status(execution.getStatus())
                job.status_message = "Job execution monitoring (progress: {}%, status: {})."\
                                     .format(execution.percentCompleted, job_msg or "n/a")
                # job.save_log(logger=task_logger)
                # job = store.update_job(job)

                if execution.isComplete():
                    job.mark_finished()
                    job.progress = JOB_PROGRESS_EXECUTE_MONITOR_END
                    msg_progress = " (status: {})".format(
                        job_msg) if job_msg else ""
                    if execution.isSucceded():
                        job.status = map_status(STATUS_SUCCEEDED)
                        job.status_message = "Job succeeded{}.".format(
                            msg_progress)
                        wps_package.retrieve_package_job_log(execution, job)
                        job.save_log(logger=task_logger)
                        job_results = [
                            ows2json_output(output, process, settings)
                            for output in execution.processOutputs
                        ]
                        job.results = make_results_relative(
                            job_results, settings)
                    else:
                        task_logger.debug("Job failed.")
                        job.status_message = "Job failed{}.".format(
                            msg_progress)
                        wps_package.retrieve_package_job_log(execution, job)
                        job.save_log(errors=execution.errors,
                                     logger=task_logger)
                    task_logger.debug(
                        "Mapping Job references with generated WPS locations.")
                    map_locations(job, settings)

            except Exception as exc:
                num_retries += 1
                task_logger.debug("Exception raised: %s", repr(exc))
                job.status_message = "Could not read status XML document for {!s}. Trying again...".format(
                    job)
                job.save_log(errors=execution.errors, logger=task_logger)
                sleep(1)
            else:
                # job.status_message = "Update {}...".format(str(job))
                # job.save_log(logger=task_logger)
                num_retries = 0
                run_step += 1
            finally:
                job = store.update_job(job)

    except Exception as exc:
        LOGGER.exception("Failed running [%s]", job)
        job.status = map_status(STATUS_FAILED)
        job.status_message = "Failed to run {!s}.".format(job)
        job.progress = JOB_PROGRESS_EXECUTE_MONITOR_ERROR
        exception_class = "{}.{}".format(
            type(exc).__module__,
            type(exc).__name__)
        errors = "{0}: {1!s}".format(exception_class, exc)
        job.save_log(errors=errors, logger=task_logger)
    finally:
        job.progress = JOB_PROGRESS_EXECUTE_MONITOR_END
        job.status_message = "Job {}.".format(job.status)
        job.save_log(logger=task_logger)

        # Send email if requested
        if job.notification_email is not None:
            job.progress = JOB_PROGRESS_NOTIFY
            try:
                notify_job_complete(job, job.notification_email, settings)
                message = "Notification email sent successfully."
                job.save_log(logger=task_logger, message=message)
            except Exception as exc:
                exception_class = "{}.{}".format(
                    type(exc).__module__,
                    type(exc).__name__)
                exception = "{0}: {1!s}".format(exception_class, exc)
                message = "Couldn't send notification email ({})".format(
                    exception)
                job.save_log(errors=message,
                             logger=task_logger,
                             message=message)

        job.progress = JOB_PROGRESS_DONE
        job.save_log(logger=task_logger, message="Job task complete.")
        job = store.update_job(job)

    return job.status