Exemplo n.º 1
0
def get_db(container, reset_connection=False):
    # type: (AnySettingsContainer, bool) -> MongoDatabase
    """
    Obtains the database connection from configured application settings.

    If :paramref:`reset_connection` is ``True``, the :paramref:`container` must be the application :class:`Registry` or
    any container that can retrieve it to accomplish reference reset. Otherwise, any settings container can be provided.
    """
    registry = get_registry(container, nothrow=True)
    if not reset_connection and registry and registry.db:
        return registry.db
    database = MongoDatabase(container)
    if reset_connection:
        registry = get_registry(container)
        registry.db = database
    return database
Exemplo n.º 2
0
def get_pywps_service(environ=None, is_worker=False):
    # type: (SettingsType, bool) -> WorkerService
    """
    Generates the PyWPS Service that provides WPS-1/2 XML endpoint.
    """
    environ = environ or {}
    try:
        # get config file
        registry = get_registry()
        settings = get_settings(registry)
        pywps_cfg = environ.get("PYWPS_CFG") or settings.get(
            "PYWPS_CFG") or os.getenv("PYWPS_CFG")
        if not isinstance(
                pywps_cfg,
                ConfigParser) or not settings.get("weaver.wps_configured"):
            load_pywps_config(settings, config=pywps_cfg)

        # call pywps application with processes filtered according to the adapter's definition
        process_store = get_db(registry).get_store(
            StoreProcesses)  # type: StoreProcesses
        processes_wps = [
            process.wps() for process in process_store.list_processes(
                visibility=Visibility.PUBLIC)
        ]
        service = WorkerService(processes_wps,
                                is_worker=is_worker,
                                settings=settings)
    except Exception as ex:
        LOGGER.exception(
            "Error occurred during PyWPS Service and/or Processes setup.")
        raise OWSNoApplicableCode(
            f"Failed setup of PyWPS Service and/or Processes. Error [{ex!r}]")
    return service
Exemplo n.º 3
0
 def _validate_process(self):
     try:
         registry = get_registry()
         store = get_db(registry).get_store(StoreProcesses)
         process = store.fetch_by_id(self.process)  # raise if not found
     except (ProcessNotAccessible, ProcessNotFound):
         raise PackageNotFound("Cannot find '{}' package for process '{}'".format(PROCESS_BUILTIN, self.process))
     if process.type != PROCESS_BUILTIN:
         raise PackageExecutionError("Invalid package is not of type '{}'".format(PROCESS_BUILTIN))
Exemplo n.º 4
0
def get_db(container=None, reset_connection=False):
    # type: (Optional[Union[AnyRegistryContainer, AnySettingsContainer]], bool) -> MongoDatabase
    """
    Obtains the database connection from configured application settings.

    If :paramref:`reset_connection` is ``True``, the :paramref:`container` must be the application :class:`Registry` or
    any container that can retrieve it to accomplish reference reset. Otherwise, any settings container can be provided.

    .. note::
        It is preferable to provide a registry reference to reuse any available connection whenever possible.
        Giving application settings will require establishing a new connection.
    """
    if not reset_connection and isinstance(container, Request):
        db = getattr(container, "db", None)
        if isinstance(db, MongoDatabase):
            return db
    registry = get_registry(container, nothrow=True)
    if not reset_connection and registry and isinstance(
            getattr(registry, "db", None), MongoDatabase):
        return registry.db
    database = MongoDatabase(container)
    if reset_connection:
        registry.db = database
    return database
Exemplo n.º 5
0
def execute_process(task, job_id, wps_url, headers=None):
    # type: (Task, UUID, str, Optional[HeadersType]) -> StatusType
    """
    Celery task that executes the WPS process job monitoring as status updates (local and remote).
    """
    from weaver.wps.service import get_pywps_service

    LOGGER.debug("Job execute process called.")

    task_process = get_celery_process()
    rss_start = task_process.memory_info().rss
    registry = get_registry(
        None)  # local thread, whether locally or dispatched celery
    settings = get_settings(registry)
    db = get_db(
        registry, reset_connection=True
    )  # reset the connection because we are in a forked celery process
    store = db.get_store(StoreJobs)
    job = store.fetch_by_id(job_id)
    job.started = now()
    job.status = Status.STARTED  # will be mapped to 'RUNNING'
    job.status_message = f"Job {Status.STARTED}."  # will preserve detail of STARTED vs RUNNING
    job.save_log(message=job.status_message)

    task_logger = get_task_logger(__name__)
    job.save_log(logger=task_logger, message="Job task setup initiated.")
    load_pywps_config(settings)
    job.progress = JobProgress.SETUP
    job.task_id = task.request.id
    job.save_log(logger=task_logger, message="Job task setup completed.")
    job = store.update_job(job)

    # Flag to keep track if job is running in background (remote-WPS, CWL app, etc.).
    # If terminate signal is sent to worker task via API dismiss request while still running in background,
    # the raised exception within the task will switch the job to Status.FAILED, but this will not raise an
    # exception here. Since the task execution 'succeeds' without raising, it skips directly to the last 'finally'.
    # Patch it back to Status.DISMISSED in this case.
    task_terminated = True

    try:
        job.progress = JobProgress.DESCRIBE
        job.save_log(logger=task_logger,
                     message=f"Employed WPS URL: [{wps_url!s}]",
                     level=logging.DEBUG)
        job.save_log(
            logger=task_logger,
            message=f"Execute WPS request for process [{job.process!s}]")
        wps_process = fetch_wps_process(job, wps_url, headers, settings)

        # prepare inputs
        job.progress = JobProgress.GET_INPUTS
        job.save_log(logger=task_logger,
                     message="Fetching job input definitions.")
        wps_inputs = parse_wps_inputs(wps_process, job)

        # prepare outputs
        job.progress = JobProgress.GET_OUTPUTS
        job.save_log(logger=task_logger,
                     message="Fetching job output definitions.")
        wps_outputs = [(o.identifier, o.dataType == WPS_COMPLEX_DATA)
                       for o in wps_process.processOutputs]

        # if process refers to a remote WPS provider, pass it down to avoid unnecessary re-fetch request
        if job.is_local:
            process = None  # already got all the information needed pre-loaded in PyWPS service
        else:
            service = Service(name=job.service, url=wps_url)
            process = Process.from_ows(wps_process, service, settings)

        job.progress = JobProgress.EXECUTE_REQUEST
        job.save_log(logger=task_logger,
                     message="Starting job process execution.")
        job.save_log(
            logger=task_logger,
            message=
            "Following updates could take a while until the Application Package answers..."
        )

        wps_worker = get_pywps_service(environ=settings, is_worker=True)
        execution = wps_worker.execute_job(job,
                                           wps_inputs=wps_inputs,
                                           wps_outputs=wps_outputs,
                                           remote_process=process,
                                           headers=headers)
        if not execution.process and execution.errors:
            raise execution.errors[0]

        # adjust status location
        wps_status_path = get_wps_local_status_location(
            execution.statusLocation, settings)
        job.progress = JobProgress.EXECUTE_STATUS_LOCATION
        LOGGER.debug("WPS status location that will be queried: [%s]",
                     wps_status_path)
        if not wps_status_path.startswith("http") and not os.path.isfile(
                wps_status_path):
            LOGGER.warning(
                "WPS status location not resolved to local path: [%s]",
                wps_status_path)
        job.save_log(
            logger=task_logger,
            level=logging.DEBUG,
            message=f"Updated job status location: [{wps_status_path}].")

        job.status = Status.RUNNING
        job.status_message = execution.statusMessage or f"{job!s} initiation done."
        job.status_location = wps_status_path
        job.request = execution.request
        job.response = execution.response
        job.progress = JobProgress.EXECUTE_MONITOR_START
        job.save_log(logger=task_logger,
                     message="Starting monitoring of job execution.")
        job = store.update_job(job)

        max_retries = 5
        num_retries = 0
        run_step = 0
        while execution.isNotComplete() or run_step == 0:
            if num_retries >= max_retries:
                job.save_log(errors=execution.errors, logger=task_logger)
                job = store.update_job(job)
                raise Exception(
                    f"Could not read status document after {max_retries} retries. Giving up."
                )
            try:
                # NOTE:
                #   Don't actually log anything here until process is completed (success or fail) so that underlying
                #   WPS execution logs can be inserted within the current job log and appear continuously.
                #   Only update internal job fields in case they get referenced elsewhere.
                progress_min = JobProgress.EXECUTE_MONITOR_LOOP
                progress_max = JobProgress.EXECUTE_MONITOR_DONE
                job.progress = progress_min
                run_delay = wait_secs(run_step)
                execution = check_wps_status(location=wps_status_path,
                                             settings=settings,
                                             sleep_secs=run_delay)
                job_msg = (execution.statusMessage or "").strip()
                job.response = execution.response
                job.status = map_status(execution.getStatus())
                job_status_msg = job_msg or "n/a"
                job_percent = execution.percentCompleted
                job.status_message = f"Job execution monitoring (progress: {job_percent}%, status: {job_status_msg})."

                if execution.isComplete():
                    msg_progress = f" (status: {job_msg})" if job_msg else ""
                    if execution.isSucceded():
                        wps_package.retrieve_package_job_log(
                            execution, job, progress_min, progress_max)
                        job.status = map_status(Status.SUCCEEDED)
                        job.status_message = f"Job succeeded{msg_progress}."
                        job.progress = progress_max
                        job.save_log(logger=task_logger)
                        job_results = [
                            ows2json_output_data(output, process, settings)
                            for output in execution.processOutputs
                        ]
                        job.results = make_results_relative(
                            job_results, settings)
                    else:
                        task_logger.debug("Job failed.")
                        wps_package.retrieve_package_job_log(
                            execution, job, progress_min, progress_max)
                        job.status_message = f"Job failed{msg_progress}."
                        job.progress = progress_max
                        job.save_log(errors=execution.errors,
                                     logger=task_logger)
                    task_logger.debug(
                        "Mapping Job references with generated WPS locations.")
                    map_locations(job, settings)
                    job = store.update_job(job)

            except Exception as exc:
                num_retries += 1
                task_logger.debug("Exception raised: %s", repr(exc))
                job.status_message = f"Could not read status XML document for {job!s}. Trying again..."
                job.save_log(errors=execution.errors, logger=task_logger)
                job = store.update_job(job)
                sleep(1)
            else:
                num_retries = 0
                run_step += 1
            finally:
                task_terminated = False  # reached only if WPS execution completed (worker not terminated beforehand)
                job = store.update_job(job)

    except Exception as exc:
        # if 'execute_job' finishes quickly before even reaching the 'monitoring loop'
        # consider WPS execution produced an error (therefore Celery worker not terminated)
        task_terminated = False
        LOGGER.exception("Failed running [%s]", job)
        LOGGER.debug("Failed job [%s] raised an exception.", job, exc_info=exc)
        # note: don't update the progress here to preserve last one that was set
        job.status = map_status(Status.FAILED)
        job.status_message = f"Failed to run {job!s}."
        errors = f"{fully_qualified_name(exc)}: {exc!s}"
        job.save_log(errors=errors, logger=task_logger)
        job = store.update_job(job)
    finally:
        # if task worker terminated, local 'job' is out of date compared to remote/background runner last update
        job = store.fetch_by_id(job.id)
        if task_terminated and map_status(job.status) == Status.FAILED:
            job.status = Status.DISMISSED
        task_success = map_status(
            job.status) not in JOB_STATUS_CATEGORIES[StatusCategory.FAILED]
        collect_statistics(task_process, settings, job, rss_start)
        if task_success:
            job.progress = JobProgress.EXECUTE_MONITOR_END
        job.status_message = f"Job {job.status}."
        job.save_log(logger=task_logger)

        if task_success:
            job.progress = JobProgress.NOTIFY
        send_job_complete_notification_email(job, task_logger, settings)

        if job.status not in JOB_STATUS_CATEGORIES[StatusCategory.FINISHED]:
            job.status = Status.SUCCEEDED
        job.status_message = f"Job {job.status}."
        job.mark_finished()
        if task_success:
            job.progress = JobProgress.DONE
        job.save_log(logger=task_logger, message="Job task complete.")
        job = store.update_job(job)

    return job.status