예제 #1
0
    def monitor(self, monitor_reference):
        # type: (JobExecution) -> bool
        execution = monitor_reference["execution"]
        max_retries = 20  # using 'wait_secs' incremental delays, this is ~3min of retry attempts
        num_retries = 0
        run_step = 0
        job_id = "<undefined>"
        while execution.isNotComplete() or run_step == 0:
            if num_retries >= max_retries:
                raise Exception(
                    "Could not read status document after {} retries. Giving up."
                    .format(max_retries))
            try:
                execution = check_wps_status(location=execution.statusLocation,
                                             sleep_secs=wait_secs(run_step),
                                             settings=self.settings)
                monitor_reference[
                    "execution"] = execution  # update reference for later stages
                job_id = execution.statusLocation.split("/")[-1].replace(
                    ".xml", "")
                exec_status = status.map_status(execution.getStatus())
                LOGGER.debug(
                    get_log_monitor_msg(job_id, exec_status,
                                        execution.percentCompleted,
                                        execution.statusMessage,
                                        execution.statusLocation))
                log_msg = get_job_log_msg(status=exec_status,
                                          message=execution.statusMessage,
                                          progress=execution.percentCompleted,
                                          duration=None)  # get if available
                log_progress = map_progress(execution.percentCompleted,
                                            REMOTE_JOB_PROGRESS_MONITOR,
                                            REMOTE_JOB_PROGRESS_RESULTS)
                self.update_status(log_msg, log_progress,
                                   status.STATUS_RUNNING)
            except Exception as exc:
                num_retries += 1
                LOGGER.debug("Exception raised: %r", exc)
                sleep(1)
            else:
                num_retries = 0
                run_step += 1

        if not execution.isSucceded():
            exec_msg = execution.statusMessage or "Job failed."
            exec_status = status.map_status(execution.getStatus())
            LOGGER.debug(
                get_log_monitor_msg(job_id, exec_status,
                                    execution.percentCompleted, exec_msg,
                                    execution.statusLocation))
            return False
        return True
예제 #2
0
def submit_job_handler(payload,             # type: JSON
                       settings,            # type: SettingsType
                       service_url,         # type: str
                       provider_id=None,    # type: Optional[str]
                       process_id=None,     # type: str
                       is_workflow=False,   # type: bool
                       is_local=True,       # type: bool
                       visibility=None,     # type: Optional[str]
                       language=None,       # type: Optional[str]
                       auth=None,           # type: Optional[HeaderCookiesType]
                       tags=None,           # type: Optional[List[str]]
                       user=None,           # type: Optional[int]
                       context=None,        # type: Optional[str]
                       ):                   # type: (...) -> JSON
    """
    Submits the job to the Celery worker with provided parameters.

    Assumes that parameters have been pre-fetched and validated, except for the input payload.
    """
    try:
        json_body = sd.Execute().deserialize(payload)
    except colander.Invalid as ex:
        raise HTTPBadRequest("Invalid schema: [{}]".format(str(ex)))

    # TODO: remove when all parameter variations are supported
    # FIXME:
    #   - support 'sync' and 'Prefer' header variants (https://github.com/crim-ca/weaver/issues/247)
    #   - support 'response: raw' (https://github.com/crim-ca/weaver/issues/376)
    #   - allow omitting 'outputs' (https://github.com/crim-ca/weaver/issues/375)
    _validate_job_parameters(json_body)

    is_execute_async = json_body["mode"] != EXECUTE_MODE_SYNC   # convert auto to async
    notification_email = json_body.get("notification_email")
    encrypted_email = encrypt_email(notification_email, settings) if notification_email else None

    store = get_db(settings).get_store(StoreJobs)
    job = store.save_job(task_id=STATUS_ACCEPTED, process=process_id, service=provider_id,
                         inputs=json_body.get("inputs"), is_local=is_local, is_workflow=is_workflow,
                         access=visibility, user_id=user, execute_async=is_execute_async, custom_tags=tags,
                         notification_email=encrypted_email, accept_language=language, context=context)
    job.save_log(logger=LOGGER, message="Job task submitted for execution.", status=STATUS_ACCEPTED, progress=0)
    job = store.update_job(job)
    result = execute_process.delay(job_id=job.id, wps_url=clean_ows_url(service_url), headers=auth)
    LOGGER.debug("Celery pending task [%s] for job [%s].", result.id, job.id)

    # local/provider process location
    location_base = "/providers/{provider_id}".format(provider_id=provider_id) if provider_id else ""
    location = "{base_url}{location_base}/processes/{process_id}/jobs/{job_id}".format(
        base_url=get_wps_restapi_base_url(settings),
        location_base=location_base,
        process_id=process_id,
        job_id=job.id)
    body_data = {
        "jobID": job.id,
        "processID": job.process,
        "providerID": provider_id,  # dropped by validator if not applicable
        "status": map_status(STATUS_ACCEPTED),
        "location": location
    }
    return body_data
예제 #3
0
    def json(self, container=None):     # pylint: disable=W0221,arguments-differ
        # type: (Optional[AnySettingsContainer]) -> JSON
        """Obtain the JSON data representation for response body.

        .. note::
            Settings are required to update API shortcut URLs to job additional information.
            Without them, paths will not include the API host, which will not resolve to full URI.
        """
        settings = get_settings(container) if container else {}
        job_json = {
            "jobID": self.id,
            "status": self.status,
            "message": self.status_message,
            "duration": self.duration_str,
            "percentCompleted": self.progress,
        }
        job_url = self._job_url(settings)
        # TODO: use links (https://github.com/crim-ca/weaver/issues/58)
        if self.status in JOB_STATUS_CATEGORIES[STATUS_CATEGORY_FINISHED]:
            job_status = map_status(self.status)
            if job_status == STATUS_SUCCEEDED:
                resource_type = "result"
            else:
                resource_type = "exceptions"
            job_json[resource_type] = "{job_url}/{res}".format(job_url=job_url, res=resource_type.lower())
        job_json["logs"] = "{job_url}/logs".format(job_url=job_url)
        return sd.JobStatusInfo().deserialize(job_json)
예제 #4
0
파일: utils.py 프로젝트: crim-ca/weaver
def dismiss_job_task(job, container):
    # type: (Job, AnySettingsContainer) -> Job
    """
    Cancels any pending or running :mod:`Celery` task and removes completed job artifacts.

    .. note::
        The :term:`Job` object itself is not deleted, only its artifacts.
        Therefore, its inputs, outputs, logs, exceptions, etc. are still available in the database,
        but corresponding files that would be exposed by ``weaver.wps_output`` configurations are removed.

    :param job: Job to cancel or cleanup.
    :param container: Application settings.
    :return: Updated and dismissed job.
    """
    raise_job_dismissed(job, container)
    if job.status in JOB_STATUS_CATEGORIES[StatusCategory.RUNNING]:
        # signal to stop celery task. Up to it to terminate remote if any.
        LOGGER.debug("Job [%s] dismiss operation: Canceling task [%s]", job.id,
                     job.task_id)
        celery_app.control.revoke(job.task_id, terminate=True)

    wps_out_dir = get_wps_output_dir(container)
    job_out_dir = os.path.join(wps_out_dir, str(job.id))
    job_out_log = os.path.join(wps_out_dir, str(job.id) + ".log")
    job_out_xml = os.path.join(wps_out_dir, str(job.id) + ".xml")
    if os.path.isdir(job_out_dir):
        LOGGER.debug("Job [%s] dismiss operation: Removing output results.",
                     job.id)
        shutil.rmtree(
            job_out_dir,
            onerror=lambda func, path, _exc: LOGGER.warning(
                "Job [%s] dismiss operation: Failed to delete [%s] due to [%s]",
                job.id, job_out_dir, _exc))
    if os.path.isfile(job_out_log):
        LOGGER.debug("Job [%s] dismiss operation: Removing output logs.",
                     job.id)
        try:
            os.remove(job_out_log)
        except OSError as exc:
            LOGGER.warning(
                "Job [%s] dismiss operation: Failed to delete [%s] due to [%s]",
                job.id, job_out_log, exc)
    if os.path.isfile(job_out_xml):
        LOGGER.debug("Job [%s] dismiss operation: Removing output WPS status.",
                     job.id)
        try:
            os.remove(job_out_xml)
        except OSError as exc:
            LOGGER.warning(
                "Job [%s] dismiss operation: Failed to delete [%s] due to [%s]",
                job.id, job_out_xml, exc)

    LOGGER.debug("Job [%s] dismiss operation: Updating job status.")
    store = get_db(container).get_store(StoreJobs)
    job.status_message = f"Job {Status.DISMISSED}."
    job.status = map_status(Status.DISMISSED)
    job = store.update_job(job)
    return job
예제 #5
0
    def monitor(self, monitor_reference):
        # type: (str) -> bool
        job_status_uri = monitor_reference
        job_status_data = self.get_job_status(job_status_uri)
        job_status_value = status.map_status(job_status_data["status"])
        job_id = job_status_data["jobID"]

        self.update_status(
            "Monitoring job on remote ADES : {0}".format(job_status_uri),
            REMOTE_JOB_PROGRESS_MONITORING, status.STATUS_RUNNING)

        while job_status_value not in status.JOB_STATUS_CATEGORIES[
                status.JOB_STATUS_CATEGORY_FINISHED]:
            sleep(5)
            job_status_data = self.get_job_status(job_status_uri)
            job_status_value = status.map_status(job_status_data["status"])

            LOGGER.debug(
                get_log_monitor_msg(job_id, job_status_value,
                                    job_status_data.get("percentCompleted", 0),
                                    get_any_message(job_status_data),
                                    job_status_data.get("statusLocation")))
            self.update_status(
                get_job_log_msg(
                    status=job_status_value,
                    message=get_any_message(job_status_data),
                    progress=job_status_data.get("percentCompleted", 0),
                    duration=job_status_data.get("duration",
                                                 None)),  # get if available
                map_progress(job_status_data.get("percentCompleted", 0),
                             REMOTE_JOB_PROGRESS_MONITORING,
                             REMOTE_JOB_PROGRESS_FETCH_OUT),
                status.STATUS_RUNNING)

        if job_status_value != status.STATUS_SUCCEEDED:
            LOGGER.debug(
                get_log_monitor_msg(job_id, job_status_value,
                                    job_status_data.get("percentCompleted", 0),
                                    get_any_message(job_status_data),
                                    job_status_data.get("statusLocation")))
            raise PackageExecutionError(job_status_data)
        return True
예제 #6
0
파일: jobs.py 프로젝트: crim-ca/weaver
def dismiss_job_task(job, container):
    # type: (Job, AnySettingsContainer) -> Job
    """
    Cancels any pending or running :mod:`Celery` task and removes completed job artifacts.

    :param job: job to cancel or cleanup.
    :param container:
    :return:
    """
    raise_job_dismissed(job, container)
    if job.status in status.JOB_STATUS_CATEGORIES[
            status.JOB_STATUS_CATEGORY_RUNNING]:
        # signal to stop celery task. Up to it to terminate remote if any.
        LOGGER.debug("Job [%s] dismiss operation: Canceling task [%s]", job.id,
                     job.task_id)
        celery_app.control.revoke(job.task_id, terminate=True)

    wps_out_dir = get_wps_output_dir(container)
    job_out_dir = os.path.join(wps_out_dir, str(job.id))
    job_out_log = os.path.join(wps_out_dir, str(job.id) + ".log")
    job_out_xml = os.path.join(wps_out_dir, str(job.id) + ".xml")
    if os.path.isdir(job_out_dir):
        LOGGER.debug("Job [%s] dismiss operation: Removing output results.",
                     job.id)
        shutil.rmtree(
            job_out_dir,
            onerror=lambda func, path, _exc: LOGGER.warning(
                "Job [%s] dismiss operation: Failed to delete [%s] due to [%s]",
                job.id, job_out_dir, _exc))
    if os.path.isfile(job_out_log):
        LOGGER.debug("Job [%s] dismiss operation: Removing output logs.",
                     job.id)
        try:
            os.remove(job_out_log)
        except OSError as exc:
            LOGGER.warning(
                "Job [%s] dismiss operation: Failed to delete [%s] due to [%s]",
                job.id, job_out_log, exc)
    if os.path.isfile(job_out_xml):
        LOGGER.debug("Job [%s] dismiss operation: Removing output WPS status.",
                     job.id)
        try:
            os.remove(job_out_xml)
        except OSError as exc:
            LOGGER.warning(
                "Job [%s] dismiss operation: Failed to delete [%s] due to [%s]",
                job.id, job_out_xml, exc)

    LOGGER.debug("Job [%s] dismiss operation: Updating job status.")
    store = get_db(container).get_store(StoreJobs)
    job.status_message = "Job {}.".format(status.STATUS_DISMISSED)
    job.status = status.map_status(status.STATUS_DISMISSED)
    job = store.update_job(job)
    return job
예제 #7
0
 def save_job(self,
              task_id,                   # type: str
              process,                   # type: str
              service=None,              # type: Optional[str]
              inputs=None,               # type: Optional[List[Any]]
              is_workflow=False,         # type: bool
              is_local=False,            # type: bool
              user_id=None,              # type: Optional[int]
              execute_async=True,        # type: bool
              custom_tags=None,          # type: Optional[List[str]]
              access=None,               # type: Optional[str]
              notification_email=None,   # type: Optional[str]
              accept_language=None,      # type: Optional[str]
              ):                         # type: (...) -> Job
     """
     Stores a job in mongodb.
     """
     try:
         tags = ["dev"]
         tags.extend(list(filter(lambda t: bool(t), custom_tags or [])))  # remove empty tags
         if is_workflow:
             tags.append(PROCESS_WORKFLOW)
         else:
             tags.append(PROCESS_APPLICATION)
         if execute_async:
             tags.append(EXECUTE_MODE_ASYNC)
         else:
             tags.append(EXECUTE_MODE_SYNC)
         if not access:
             access = VISIBILITY_PRIVATE
         new_job = Job({
             "task_id": task_id,
             "user_id": user_id,
             "service": service,     # provider identifier (WPS service)
             "process": process,     # process identifier (WPS request)
             "inputs": inputs,
             "status": map_status(STATUS_ACCEPTED),
             "execute_async": execute_async,
             "is_workflow": is_workflow,
             "is_local": is_local,
             "created": now(),
             "tags": list(set(tags)),  # remove duplicates
             "access": access,
             "notification_email": notification_email,
             "accept_language": accept_language,
         })
         self.collection.insert_one(new_job.params())
         job = self.fetch_by_id(job_id=new_job.id)
     except Exception as ex:
         raise JobRegistrationError("Error occurred during job registration: [{}]".format(repr(ex)))
     if job is None:
         raise JobRegistrationError("Failed to retrieve registered job.")
     return job
예제 #8
0
 def get_job_status(self, job_status_uri, retry=True):
     # type: (JobMonitorReference, Union[bool, int]) -> JSON
     """
     Obtains the contents from the :term:`Job` status response.
     """
     response = self.make_request(
         method="GET", url=job_status_uri,
         retry=retry)  # retry in case not yet ready
     response.raise_for_status()
     job_status = response.json()
     job_id = job_status_uri.split("/")[-1]
     if "jobID" not in job_status:
         job_status["jobID"] = job_id  # provide if not implemented by ADES
     job_status["status"] = status.map_status(job_status["status"])
     return job_status
예제 #9
0
def cancel_job(request):
    """
    Dismiss a job.

    Note: Will only stop tracking this particular process (WPS 1.0 doesn't allow to stop a process)
    """
    job = get_job(request)
    app.control.revoke(job.task_id, terminate=True)
    store = get_db(request).get_store(StoreJobs)
    job.status_message = "Job dismissed."
    job.status = status.map_status(status.STATUS_DISMISSED)
    store.update_job(job)

    return HTTPOk(json={
        "jobID": job.id,
        "status": job.status,
        "message": job.status_message,
        "percentCompleted": job.progress,
    })
예제 #10
0
    def get_job_status(self, job_status_uri, retry=True):
        response = self.make_request(method="GET",
                                     url=job_status_uri,
                                     retry=True,
                                     status_code_mock=HTTPNotFound.code)
        # Retry on 404 since job may not be fully ready
        if retry and response.status_code == HTTPNotFound.code:
            sleep(5)
            return self.get_job_status(job_status_uri, retry=False)

        response.raise_for_status()
        job_status = response.json()

        # TODO Remove patch for Geomatys not conforming to the status schema
        #  - jobID is missing
        #  - handled by 'map_status': status are upper cases and succeeded process are indicated as successful
        job_id = job_status_uri.split("/")[-1]
        if "jobID" not in job_status:
            job_status["jobID"] = job_id
        job_status["status"] = status.map_status(job_status["status"])
        return job_status
예제 #11
0
파일: jobs.py 프로젝트: crim-ca/weaver
def get_job_results(request):
    # type: (Request) -> HTTPException
    """
    Retrieve the results of a job.
    """
    job = get_job(request)
    raise_job_dismissed(job, request)
    job_status = status.map_status(job.status)
    if job_status in status.JOB_STATUS_CATEGORIES[
            status.JOB_STATUS_CATEGORY_RUNNING]:
        raise HTTPNotFound(
            json={
                "code":
                "ResultsNotReady",
                "description":
                "Job status is '{}'. Results are not yet available.".format(
                    job_status)
            })
    results = get_results(job, request, value_key="value", ogc_api=True)
    # note: cannot add links in this case because variable OutputID keys are directly at the root
    results = sd.Result().deserialize(results)
    return HTTPOk(json=results)
예제 #12
0
파일: utils.py 프로젝트: crim-ca/weaver
def get_job_submission_response(body, headers, error=False):
    # type: (JSON, AnyHeadersContainer, bool) -> Union[HTTPOk, HTTPCreated]
    """
    Generates the successful response from contents returned by :term:`Job` submission process.

    If :term:`Job` already finished processing within requested ``Prefer: wait=X`` seconds delay (and if allowed by
    the :term:`Process` ``jobControlOptions``), return the successful status immediately instead of created status.

    Otherwise, return the status monitoring location of the created :term:`Job` to be monitored asynchronously.

    .. seealso::
        :func:`weaver.processes.execution.submit_job`
        :func:`weaver.processes.execution.submit_job_handler`
    """
    # convert headers to pass as list to avoid any duplicate Content-related headers
    # otherwise auto-added by JSON handling when provided by dict-like structure
    if hasattr(headers, "items"):
        headers = list(headers.items())
    get_header("Content-Type", headers, pop=True)
    headers.append(("Content-Type", ContentType.APP_JSON))

    status = map_status(body.get("status"))
    if status in JOB_STATUS_CATEGORIES[StatusCategory.FINISHED]:
        if error:
            http_class = HTTPBadRequest
            http_desc = sd.FailedSyncJobResponse.description
        else:
            http_class = HTTPOk
            http_desc = sd.CompletedJobResponse.description
            body = sd.CompletedJobStatusSchema().deserialize(body)

        body["description"] = http_desc
        return http_class(json=body, headerlist=headers)

    body["description"] = sd.CreatedLaunchJobResponse.description
    body = sd.CreatedJobStatusSchema().deserialize(body)
    return HTTPCreated(json=body, headerlist=headers)
예제 #13
0
def test_map_status_pywps_compliant_as_int_statuses():
    for s in range(len(WPS_STATUS)):
        if STATUS_PYWPS_MAP[s] != Status.UNKNOWN:
            assert map_status(s, StatusCompliant.PYWPS) in JOB_STATUS_CATEGORIES[StatusCompliant.PYWPS]
예제 #14
0
    def execute(self, workflow_inputs, out_dir, expected_outputs):
        self.update_status("Preparing execute request for remote WPS1 provider.",
                           REMOTE_JOB_PROGRESS_REQ_PREP, status.STATUS_RUNNING)
        LOGGER.debug("Execute process WPS request for %s", self.process)
        try:
            try:
                wps = WebProcessingService(url=self.provider, headers=self.cookies, verify=self.verify)
                raise_on_xml_exception(wps._capabilities)  # noqa: W0212
            except Exception as ex:
                raise OWSNoApplicableCode("Failed to retrieve WPS capabilities. Error: [{}].".format(str(ex)))
            try:
                process = wps.describeprocess(self.process)
            except Exception as ex:
                raise OWSNoApplicableCode("Failed to retrieve WPS process description. Error: [{}].".format(str(ex)))

            # prepare inputs
            complex_inputs = []
            for process_input in process.dataInputs:
                if WPS_COMPLEX_DATA in process_input.dataType:
                    complex_inputs.append(process_input.identifier)

            # remove any 'null' input, should employ the 'default' of the remote WPS process
            inputs_provided_keys = filter(lambda i: workflow_inputs[i] != "null", workflow_inputs)

            wps_inputs = []
            for input_key in inputs_provided_keys:
                input_val = workflow_inputs[input_key]
                # in case of array inputs, must repeat (id,value)
                # in case of complex input (File), obtain location, otherwise get data value
                if not isinstance(input_val, list):
                    input_val = [input_val]

                input_values = []
                for val in input_val:
                    if isinstance(val, dict):
                        val = val["location"]

                    # owslib only accepts strings, not numbers directly
                    if isinstance(val, (int, float)):
                        val = str(val)

                    if val.startswith("file://"):
                        # we need to host file starting with file:// scheme
                        val = self.host_file(val)

                    input_values.append(val)

                # need to use ComplexDataInput structure for complex input
                # TODO: BoundingBox not supported
                for input_value in input_values:
                    if input_key in complex_inputs:
                        input_value = ComplexDataInput(input_value)

                    wps_inputs.append((input_key, input_value))

            # prepare outputs
            outputs = [(o.identifier, o.dataType == WPS_COMPLEX_DATA) for o in process.processOutputs
                       if o.identifier in expected_outputs]

            self.update_status("Executing job on remote WPS1 provider.",
                               REMOTE_JOB_PROGRESS_EXECUTION, status.STATUS_RUNNING)

            mode = EXECUTE_MODE_ASYNC
            execution = wps.execute(self.process, inputs=wps_inputs, output=outputs, mode=mode, lineage=True)
            if not execution.process and execution.errors:
                raise execution.errors[0]

            self.update_status("Monitoring job on remote WPS1 provider : [{0}]".format(self.provider),
                               REMOTE_JOB_PROGRESS_MONITORING, status.STATUS_RUNNING)

            max_retries = 5
            num_retries = 0
            run_step = 0
            job_id = "<undefined>"
            while execution.isNotComplete() or run_step == 0:
                if num_retries >= max_retries:
                    raise Exception("Could not read status document after {} retries. Giving up.".format(max_retries))
                try:
                    execution = check_wps_status(location=execution.statusLocation, verify=self.verify,
                                                 sleep_secs=wait_secs(run_step))
                    job_id = execution.statusLocation.replace(".xml", "").split("/")[-1]
                    LOGGER.debug(get_log_monitor_msg(job_id, status.map_status(execution.getStatus()),
                                                     execution.percentCompleted, execution.statusMessage,
                                                     execution.statusLocation))
                    self.update_status(get_job_log_msg(status=status.map_status(execution.getStatus()),
                                                       message=execution.statusMessage,
                                                       progress=execution.percentCompleted,
                                                       duration=None),  # get if available
                                       map_progress(execution.percentCompleted,
                                                    REMOTE_JOB_PROGRESS_MONITORING, REMOTE_JOB_PROGRESS_FETCH_OUT),
                                       status.STATUS_RUNNING)
                except Exception as exc:
                    num_retries += 1
                    LOGGER.debug("Exception raised: %r", exc)
                    sleep(1)
                else:
                    num_retries = 0
                    run_step += 1

            if not execution.isSucceded():
                exec_msg = execution.statusMessage or "Job failed."
                LOGGER.debug(get_log_monitor_msg(job_id, status.map_status(execution.getStatus()),
                                                 execution.percentCompleted, exec_msg, execution.statusLocation))
                raise Exception(execution.statusMessage or "Job failed.")

            self.update_status("Fetching job outputs from remote WPS1 provider.",
                               REMOTE_JOB_PROGRESS_FETCH_OUT, status.STATUS_RUNNING)

            results = [ows2json_output(output, process) for output in execution.processOutputs]
            for result in results:
                result_id = get_any_id(result)
                result_val = get_any_value(result)
                if result_id in expected_outputs:
                    # This is where cwl expect the output file to be written
                    # TODO We will probably need to handle multiple output value...
                    dst_fn = "/".join([out_dir.rstrip("/"), expected_outputs[result_id]])

                    # TODO Should we handle other type than File reference?

                    resp = request_extra("get", result_val, allow_redirects=True, settings=self.settings)
                    LOGGER.debug("Fetching result output from [%s] to cwl output destination: [%s]", result_val, dst_fn)
                    with open(dst_fn, mode="wb") as dst_fh:
                        dst_fh.write(resp.content)

        except Exception as exc:
            exception_class = "{}.{}".format(type(exc).__module__, type(exc).__name__)
            errors = "{0}: {1!s}".format(exception_class, exc)
            LOGGER.exception(exc)
            raise Exception(errors)

        self.update_status("Execution on remote WPS1 provider completed.",
                           REMOTE_JOB_PROGRESS_COMPLETED, status.STATUS_SUCCEEDED)
예제 #15
0
파일: test_utils.py 프로젝트: 00mjk/weaver
def test_map_status_pywps_compliant_as_int_statuses():
    for s in range(len(WPS_STATUS)):
        if status.STATUS_PYWPS_MAP[s] != status.STATUS_UNKNOWN:
            assert status.map_status(s, status.STATUS_COMPLIANT_PYWPS) in \
                   status.JOB_STATUS_CATEGORIES[status.STATUS_COMPLIANT_PYWPS]
예제 #16
0
파일: test_utils.py 프로젝트: 00mjk/weaver
def test_map_status_owslib_compliant():
    for sv in status.JOB_STATUS_VALUES:
        for s in get_status_variations(sv):
            assert status.map_status(s, status.STATUS_COMPLIANT_OWSLIB) in \
                   status.JOB_STATUS_CATEGORIES[status.STATUS_COMPLIANT_OWSLIB]
예제 #17
0
파일: test_utils.py 프로젝트: 00mjk/weaver
def test_map_status_back_compatibility_and_special_cases():
    for c in [
            status.STATUS_COMPLIANT_OGC, status.STATUS_COMPLIANT_PYWPS,
            status.STATUS_COMPLIANT_OWSLIB
    ]:
        assert status.map_status("successful", c) == status.STATUS_SUCCEEDED
예제 #18
0
def execute_process(task, job_id, wps_url, headers=None):
    # type: (Task, UUID, str, Optional[HeadersType]) -> StatusType
    """
    Celery task that executes the WPS process job monitoring as status updates (local and remote).
    """
    from weaver.wps.service import get_pywps_service

    LOGGER.debug("Job execute process called.")

    task_process = get_celery_process()
    rss_start = task_process.memory_info().rss
    registry = get_registry(
        None)  # local thread, whether locally or dispatched celery
    settings = get_settings(registry)
    db = get_db(
        registry, reset_connection=True
    )  # reset the connection because we are in a forked celery process
    store = db.get_store(StoreJobs)
    job = store.fetch_by_id(job_id)
    job.started = now()
    job.status = Status.STARTED  # will be mapped to 'RUNNING'
    job.status_message = f"Job {Status.STARTED}."  # will preserve detail of STARTED vs RUNNING
    job.save_log(message=job.status_message)

    task_logger = get_task_logger(__name__)
    job.save_log(logger=task_logger, message="Job task setup initiated.")
    load_pywps_config(settings)
    job.progress = JobProgress.SETUP
    job.task_id = task.request.id
    job.save_log(logger=task_logger, message="Job task setup completed.")
    job = store.update_job(job)

    # Flag to keep track if job is running in background (remote-WPS, CWL app, etc.).
    # If terminate signal is sent to worker task via API dismiss request while still running in background,
    # the raised exception within the task will switch the job to Status.FAILED, but this will not raise an
    # exception here. Since the task execution 'succeeds' without raising, it skips directly to the last 'finally'.
    # Patch it back to Status.DISMISSED in this case.
    task_terminated = True

    try:
        job.progress = JobProgress.DESCRIBE
        job.save_log(logger=task_logger,
                     message=f"Employed WPS URL: [{wps_url!s}]",
                     level=logging.DEBUG)
        job.save_log(
            logger=task_logger,
            message=f"Execute WPS request for process [{job.process!s}]")
        wps_process = fetch_wps_process(job, wps_url, headers, settings)

        # prepare inputs
        job.progress = JobProgress.GET_INPUTS
        job.save_log(logger=task_logger,
                     message="Fetching job input definitions.")
        wps_inputs = parse_wps_inputs(wps_process, job)

        # prepare outputs
        job.progress = JobProgress.GET_OUTPUTS
        job.save_log(logger=task_logger,
                     message="Fetching job output definitions.")
        wps_outputs = [(o.identifier, o.dataType == WPS_COMPLEX_DATA)
                       for o in wps_process.processOutputs]

        # if process refers to a remote WPS provider, pass it down to avoid unnecessary re-fetch request
        if job.is_local:
            process = None  # already got all the information needed pre-loaded in PyWPS service
        else:
            service = Service(name=job.service, url=wps_url)
            process = Process.from_ows(wps_process, service, settings)

        job.progress = JobProgress.EXECUTE_REQUEST
        job.save_log(logger=task_logger,
                     message="Starting job process execution.")
        job.save_log(
            logger=task_logger,
            message=
            "Following updates could take a while until the Application Package answers..."
        )

        wps_worker = get_pywps_service(environ=settings, is_worker=True)
        execution = wps_worker.execute_job(job,
                                           wps_inputs=wps_inputs,
                                           wps_outputs=wps_outputs,
                                           remote_process=process,
                                           headers=headers)
        if not execution.process and execution.errors:
            raise execution.errors[0]

        # adjust status location
        wps_status_path = get_wps_local_status_location(
            execution.statusLocation, settings)
        job.progress = JobProgress.EXECUTE_STATUS_LOCATION
        LOGGER.debug("WPS status location that will be queried: [%s]",
                     wps_status_path)
        if not wps_status_path.startswith("http") and not os.path.isfile(
                wps_status_path):
            LOGGER.warning(
                "WPS status location not resolved to local path: [%s]",
                wps_status_path)
        job.save_log(
            logger=task_logger,
            level=logging.DEBUG,
            message=f"Updated job status location: [{wps_status_path}].")

        job.status = Status.RUNNING
        job.status_message = execution.statusMessage or f"{job!s} initiation done."
        job.status_location = wps_status_path
        job.request = execution.request
        job.response = execution.response
        job.progress = JobProgress.EXECUTE_MONITOR_START
        job.save_log(logger=task_logger,
                     message="Starting monitoring of job execution.")
        job = store.update_job(job)

        max_retries = 5
        num_retries = 0
        run_step = 0
        while execution.isNotComplete() or run_step == 0:
            if num_retries >= max_retries:
                job.save_log(errors=execution.errors, logger=task_logger)
                job = store.update_job(job)
                raise Exception(
                    f"Could not read status document after {max_retries} retries. Giving up."
                )
            try:
                # NOTE:
                #   Don't actually log anything here until process is completed (success or fail) so that underlying
                #   WPS execution logs can be inserted within the current job log and appear continuously.
                #   Only update internal job fields in case they get referenced elsewhere.
                progress_min = JobProgress.EXECUTE_MONITOR_LOOP
                progress_max = JobProgress.EXECUTE_MONITOR_DONE
                job.progress = progress_min
                run_delay = wait_secs(run_step)
                execution = check_wps_status(location=wps_status_path,
                                             settings=settings,
                                             sleep_secs=run_delay)
                job_msg = (execution.statusMessage or "").strip()
                job.response = execution.response
                job.status = map_status(execution.getStatus())
                job_status_msg = job_msg or "n/a"
                job_percent = execution.percentCompleted
                job.status_message = f"Job execution monitoring (progress: {job_percent}%, status: {job_status_msg})."

                if execution.isComplete():
                    msg_progress = f" (status: {job_msg})" if job_msg else ""
                    if execution.isSucceded():
                        wps_package.retrieve_package_job_log(
                            execution, job, progress_min, progress_max)
                        job.status = map_status(Status.SUCCEEDED)
                        job.status_message = f"Job succeeded{msg_progress}."
                        job.progress = progress_max
                        job.save_log(logger=task_logger)
                        job_results = [
                            ows2json_output_data(output, process, settings)
                            for output in execution.processOutputs
                        ]
                        job.results = make_results_relative(
                            job_results, settings)
                    else:
                        task_logger.debug("Job failed.")
                        wps_package.retrieve_package_job_log(
                            execution, job, progress_min, progress_max)
                        job.status_message = f"Job failed{msg_progress}."
                        job.progress = progress_max
                        job.save_log(errors=execution.errors,
                                     logger=task_logger)
                    task_logger.debug(
                        "Mapping Job references with generated WPS locations.")
                    map_locations(job, settings)
                    job = store.update_job(job)

            except Exception as exc:
                num_retries += 1
                task_logger.debug("Exception raised: %s", repr(exc))
                job.status_message = f"Could not read status XML document for {job!s}. Trying again..."
                job.save_log(errors=execution.errors, logger=task_logger)
                job = store.update_job(job)
                sleep(1)
            else:
                num_retries = 0
                run_step += 1
            finally:
                task_terminated = False  # reached only if WPS execution completed (worker not terminated beforehand)
                job = store.update_job(job)

    except Exception as exc:
        # if 'execute_job' finishes quickly before even reaching the 'monitoring loop'
        # consider WPS execution produced an error (therefore Celery worker not terminated)
        task_terminated = False
        LOGGER.exception("Failed running [%s]", job)
        LOGGER.debug("Failed job [%s] raised an exception.", job, exc_info=exc)
        # note: don't update the progress here to preserve last one that was set
        job.status = map_status(Status.FAILED)
        job.status_message = f"Failed to run {job!s}."
        errors = f"{fully_qualified_name(exc)}: {exc!s}"
        job.save_log(errors=errors, logger=task_logger)
        job = store.update_job(job)
    finally:
        # if task worker terminated, local 'job' is out of date compared to remote/background runner last update
        job = store.fetch_by_id(job.id)
        if task_terminated and map_status(job.status) == Status.FAILED:
            job.status = Status.DISMISSED
        task_success = map_status(
            job.status) not in JOB_STATUS_CATEGORIES[StatusCategory.FAILED]
        collect_statistics(task_process, settings, job, rss_start)
        if task_success:
            job.progress = JobProgress.EXECUTE_MONITOR_END
        job.status_message = f"Job {job.status}."
        job.save_log(logger=task_logger)

        if task_success:
            job.progress = JobProgress.NOTIFY
        send_job_complete_notification_email(job, task_logger, settings)

        if job.status not in JOB_STATUS_CATEGORIES[StatusCategory.FINISHED]:
            job.status = Status.SUCCEEDED
        job.status_message = f"Job {job.status}."
        job.mark_finished()
        if task_success:
            job.progress = JobProgress.DONE
        job.save_log(logger=task_logger, message="Job task complete.")
        job = store.update_job(job)

    return job.status
예제 #19
0
파일: test_utils.py 프로젝트: 00mjk/weaver
def test_map_status_pywps_compliant():
    for sv in status.JOB_STATUS_VALUES:
        for s in get_status_variations(sv):
            assert status.map_status(s, status.STATUS_COMPLIANT_PYWPS) in \
                   status.JOB_STATUS_CATEGORIES[status.STATUS_COMPLIANT_PYWPS]
예제 #20
0
def submit_job_handler(
        payload,  # type: JSON
        settings,  # type: SettingsType
        service_url,  # type: str
        provider_id=None,  # type: Optional[str]
        process_id=None,  # type: str
        is_workflow=False,  # type: bool
        is_local=True,  # type: bool
        visibility=None,  # type: Optional[AnyVisibility]
        language=None,  # type: Optional[str]
        headers=None,  # type: Optional[HeaderCookiesType]
        tags=None,  # type: Optional[List[str]]
        user=None,  # type: Optional[int]
        context=None,  # type: Optional[str]
):  # type: (...) -> AnyResponseType
    """
    Submits the job to the Celery worker with provided parameters.

    Assumes that parameters have been pre-fetched and validated, except for the input payload.
    """
    try:
        json_body = sd.Execute().deserialize(payload)
    except colander.Invalid as ex:
        raise HTTPBadRequest(f"Invalid schema: [{ex!s}]")

    db = get_db(settings)
    headers = headers or {}
    if is_local:
        proc_store = db.get_store(StoreProcesses)
        process = proc_store.fetch_by_id(process_id)
        job_ctl_opts = process.jobControlOptions
    else:
        job_ctl_opts = ExecuteControlOption.values()
    max_wait = as_int(settings.get("weaver.exec_sync_max_wait"), default=20)
    mode, wait, applied = parse_prefer_header_execute_mode(
        headers, job_ctl_opts, max_wait)
    get_header("prefer", headers, pop=True)
    if not applied:  # whatever returned is a default, consider 'mode' in body as alternative
        is_execute_async = ExecuteMode.get(
            json_body.get("mode")) != ExecuteMode.SYNC  # convert auto to async
    else:
        # as per https://datatracker.ietf.org/doc/html/rfc7240#section-2
        # Prefer header not resolve as valid still proces
        is_execute_async = mode != ExecuteMode.SYNC
    exec_resp = json_body.get("response")

    notification_email = json_body.get("notification_email")
    encrypted_email = encrypt_email(notification_email,
                                    settings) if notification_email else None

    store = db.get_store(StoreJobs)  # type: StoreJobs
    job = store.save_job(task_id=Status.ACCEPTED,
                         process=process_id,
                         service=provider_id,
                         inputs=json_body.get("inputs"),
                         outputs=json_body.get("outputs"),
                         is_local=is_local,
                         is_workflow=is_workflow,
                         access=visibility,
                         user_id=user,
                         context=context,
                         execute_async=is_execute_async,
                         execute_response=exec_resp,
                         custom_tags=tags,
                         notification_email=encrypted_email,
                         accept_language=language)
    job.save_log(logger=LOGGER,
                 message="Job task submitted for execution.",
                 status=Status.ACCEPTED,
                 progress=0)
    job = store.update_job(job)
    location_url = job.status_url(settings)
    resp_headers = {"Location": location_url}
    resp_headers.update(applied)

    wps_url = clean_ows_url(service_url)
    result = execute_process.delay(job_id=job.id,
                                   wps_url=wps_url,
                                   headers=headers)  # type: CeleryResult
    LOGGER.debug("Celery pending task [%s] for job [%s].", result.id, job.id)
    if not is_execute_async:
        LOGGER.debug(
            "Celery task requested as sync if it completes before (wait=%ss)",
            wait)
        try:
            result.wait(timeout=wait)
        except CeleryTaskTimeoutError:
            pass
        if result.ready():
            job = store.fetch_by_id(job.id)
            # when sync is successful, it must return the results direct instead of status info
            # see: https://docs.ogc.org/is/18-062r2/18-062r2.html#sc_execute_response
            if job.status == Status.SUCCEEDED:
                return get_job_results_response(job,
                                                settings,
                                                headers=resp_headers)
            # otherwise return the error status
            body = job.json(container=settings, self_link="status")
            body["location"] = location_url
            resp = get_job_submission_response(body, resp_headers, error=True)
            return resp
        else:
            LOGGER.debug(
                "Celery task requested as sync took too long to complete (wait=%ss). Continue in async.",
                wait)
            # sync not respected, therefore must drop it
            # since both could be provided as alternative preferences, drop only async with limited subset
            prefer = get_header("Preference-Applied", headers, pop=True)
            _, _, async_applied = parse_prefer_header_execute_mode(
                {"Prefer": prefer}, [ExecuteMode.ASYNC])
            if async_applied:
                resp_headers.update(async_applied)

    LOGGER.debug("Celery task submitted to run async.")
    body = {
        "jobID": job.id,
        "processID": job.process,
        "providerID": provider_id,  # dropped by validator if not applicable
        "status": map_status(Status.ACCEPTED),
        "location": location_url
    }
    resp = get_job_submission_response(body, resp_headers)
    return resp
예제 #21
0
    def execute(self, workflow_inputs, out_dir, expected_outputs):
        # TODO: test
        visible = self.is_visible()
        if not visible:  # includes private visibility and non-existing cases
            if visible is None:
                LOGGER.info(
                    "Process [%s] access is unauthorized on [%s] - deploying as admin.",
                    self.process, self.url)
            elif visible is False:
                LOGGER.info(
                    "Process [%s] is not deployed on [%s] - deploying.",
                    self.process, self.url)
            # TODO: Maybe always redeploy? What about cases of outdated deployed process?
            try:
                self.deploy()
            except Exception as exc:
                # FIXME: support for Spacebel, avoid conflict error incorrectly handled, remove 500 when fixed
                pass_http_error(exc, [HTTPConflict, HTTPInternalServerError])

        LOGGER.info("Process [%s] enforced to public visibility.",
                    self.process)
        try:
            self.set_visibility(visibility=VISIBILITY_PUBLIC)
        # TODO: support for Spacebel, remove when visibility route properly implemented on ADES
        except Exception as exc:
            pass_http_error(exc, HTTPNotFound)

        self.update_status("Preparing execute request for remote ADES.",
                           REMOTE_JOB_PROGRESS_REQ_PREP, status.STATUS_RUNNING)
        LOGGER.debug("Execute process WPS request for [%s]", self.process)

        execute_body_inputs = []
        execute_req_id = "id"
        execute_req_input_val_href = "href"
        execute_req_input_val_data = "data"
        for workflow_input_key, workflow_input_value in workflow_inputs.items(
        ):
            if isinstance(workflow_input_value, list):
                for workflow_input_value_item in workflow_input_value:
                    if isinstance(
                            workflow_input_value_item,
                            dict) and "location" in workflow_input_value_item:
                        execute_body_inputs.append({
                            execute_req_id:
                            workflow_input_key,
                            execute_req_input_val_href:
                            workflow_input_value_item["location"]
                        })
                    else:
                        execute_body_inputs.append({
                            execute_req_id:
                            workflow_input_key,
                            execute_req_input_val_data:
                            workflow_input_value_item
                        })
            else:
                if isinstance(workflow_input_value,
                              dict) and "location" in workflow_input_value:
                    execute_body_inputs.append({
                        execute_req_id:
                        workflow_input_key,
                        execute_req_input_val_href:
                        workflow_input_value["location"]
                    })
                else:
                    execute_body_inputs.append({
                        execute_req_id:
                        workflow_input_key,
                        execute_req_input_val_data:
                        workflow_input_value
                    })
        for exec_input in execute_body_inputs:
            if execute_req_input_val_href in exec_input and isinstance(
                    exec_input[execute_req_input_val_href], str):
                if exec_input[execute_req_input_val_href].startswith(
                        "{0}://".format(OPENSEARCH_LOCAL_FILE_SCHEME)):
                    exec_input[execute_req_input_val_href] = "file{0}".format(
                        exec_input[execute_req_input_val_href]
                        [len(OPENSEARCH_LOCAL_FILE_SCHEME):])
                elif exec_input[execute_req_input_val_href].startswith(
                        "file://"):
                    exec_input[execute_req_input_val_href] = self.host_file(
                        exec_input[execute_req_input_val_href])
                    LOGGER.debug("Hosting intermediate input [%s] : [%s]",
                                 exec_input[execute_req_id],
                                 exec_input[execute_req_input_val_href])

        execute_body_outputs = [{
            execute_req_id:
            output,
            "transmissionMode":
            EXECUTE_TRANSMISSION_MODE_REFERENCE
        } for output in expected_outputs]
        self.update_status("Executing job on remote ADES.",
                           REMOTE_JOB_PROGRESS_EXECUTION,
                           status.STATUS_RUNNING)

        execute_body = dict(mode=EXECUTE_MODE_ASYNC,
                            response=EXECUTE_RESPONSE_DOCUMENT,
                            inputs=execute_body_inputs,
                            outputs=execute_body_outputs)
        request_url = self.url + process_jobs_uri.format(
            process_id=self.process)
        response = self.make_request(method="POST",
                                     url=request_url,
                                     json=execute_body,
                                     retry=True)
        if response.status_code != 201:
            raise Exception(
                "Was expecting a 201 status code from the execute request : {0}"
                .format(request_url))

        job_status_uri = response.headers["Location"]
        job_status = self.get_job_status(job_status_uri)
        job_status_value = status.map_status(job_status["status"])

        self.update_status(
            "Monitoring job on remote ADES : {0}".format(job_status_uri),
            REMOTE_JOB_PROGRESS_MONITORING, status.STATUS_RUNNING)

        while job_status_value not in status.JOB_STATUS_CATEGORIES[
                status.STATUS_CATEGORY_FINISHED]:
            sleep(5)
            job_status = self.get_job_status(job_status_uri)
            job_status_value = status.map_status(job_status["status"])

            LOGGER.debug(
                get_log_monitor_msg(job_status["jobID"], job_status_value,
                                    job_status.get("percentCompleted", 0),
                                    get_any_message(job_status),
                                    job_status.get("statusLocation")))
            self.update_status(
                get_job_log_msg(status=job_status_value,
                                message=get_any_message(job_status),
                                progress=job_status.get("percentCompleted", 0),
                                duration=job_status.get(
                                    "duration", None)),  # get if available
                map_progress(job_status.get("percentCompleted",
                                            0), REMOTE_JOB_PROGRESS_MONITORING,
                             REMOTE_JOB_PROGRESS_FETCH_OUT),
                status.STATUS_RUNNING)

        if job_status_value != status.STATUS_SUCCEEDED:
            LOGGER.debug(
                get_log_monitor_msg(job_status["jobID"], job_status_value,
                                    job_status.get("percentCompleted", 0),
                                    get_any_message(job_status),
                                    job_status.get("statusLocation")))
            raise Exception(job_status)

        self.update_status("Fetching job outputs from remote ADES.",
                           REMOTE_JOB_PROGRESS_FETCH_OUT,
                           status.STATUS_RUNNING)
        results = self.get_job_results(job_status["jobID"])
        for result in results:
            if get_any_id(result) in expected_outputs:
                # This is where cwl expect the output file to be written
                # TODO We will probably need to handle multiple output value...
                dst_fn = "/".join([
                    out_dir.rstrip("/"), expected_outputs[get_any_id(result)]
                ])

                # TODO Should we handle other type than File reference?
                resp = request_extra("get",
                                     get_any_value(result),
                                     allow_redirects=True,
                                     settings=self.settings)
                LOGGER.debug(
                    "Fetching result output from [%s] to cwl output destination: [%s]",
                    get_any_value(result), dst_fn)
                with open(dst_fn, mode="wb") as dst_fh:
                    dst_fh.write(resp.content)

        self.update_status("Execution on remote ADES completed.",
                           REMOTE_JOB_PROGRESS_COMPLETED,
                           status.STATUS_SUCCEEDED)
예제 #22
0
def get_job_log_msg(status, message, progress=0, duration=None):
    # type: (str, str, Optional[Number], Optional[str]) -> str
    return "{d} {p:3d}% {s:10} {m}".format(d=duration or "",
                                           p=int(progress or 0),
                                           s=map_status(status),
                                           m=message)
예제 #23
0
def test_map_status_owslib_compliant():
    known_statuses = set(Status.values()) - {Status.UNKNOWN}
    for sv in known_statuses:
        for s in get_status_variations(sv):
            assert map_status(s, StatusCompliant.OWSLIB) in JOB_STATUS_CATEGORIES[StatusCompliant.OWSLIB]
예제 #24
0
def test_map_status_back_compatibility_and_special_cases():
    for c in StatusCompliant:
        assert map_status("successful", c) == Status.SUCCEEDED
예제 #25
0
    def monitor(self, monitor_reference):
        # type: (JobExecution) -> bool
        execution = monitor_reference["execution"]
        max_retries = 20  # using 'wait_secs' incremental delays, this is ~3min of retry attempts
        num_retries = 0
        run_step = 0
        job_id = "<undefined>"
        log_progress = Wps1RemoteJobProgress.MONITOR
        while execution.isNotComplete() or run_step == 0:
            if num_retries >= max_retries:
                raise Exception(
                    f"Could not read status document after {max_retries} retries. Giving up."
                )
            try:
                execution = check_wps_status(location=execution.statusLocation,
                                             sleep_secs=wait_secs(run_step),
                                             settings=self.settings)
                monitor_reference[
                    "execution"] = execution  # update reference for later stages
                job_id = execution.statusLocation.split("/")[-1].replace(
                    ".xml", "")
                exec_status = map_status(execution.getStatus())
                LOGGER.debug(
                    get_log_monitor_msg(job_id, exec_status,
                                        execution.percentCompleted,
                                        execution.statusMessage,
                                        execution.statusLocation))
                log_msg = get_job_log_msg(status=exec_status,
                                          message=execution.statusMessage,
                                          progress=execution.percentCompleted,
                                          duration=None)  # get if available
                log_progress = map_progress(execution.percentCompleted,
                                            Wps1RemoteJobProgress.MONITOR,
                                            Wps1RemoteJobProgress.RESULTS)
                self.update_status(log_msg, log_progress, Status.RUNNING)
            except Exception as exc:
                num_retries += 1
                LOGGER.debug("Exception raised: %r", exc)
                sleep(1)
            else:
                num_retries = 0
                run_step += 1

        if not execution.isSucceded():
            exec_msg = execution.statusMessage or "Job failed."
            exec_status = map_status(execution.getStatus())
            exec_status_url = execution.statusLocation
            LOGGER.debug(
                get_log_monitor_msg(job_id, exec_status,
                                    execution.percentCompleted, exec_msg,
                                    exec_status_url))
            # provide more details in logs of parent job process about the cause of the failing remote execution
            xml_err = bytes2str(xml_util.tostring(execution.response))
            xml_exc = get_exception_from_xml_status(execution.response)
            self.update_status(
                f"Retrieved error status response from WPS remote provider on [{exec_status_url}]:\n{xml_err}\n",
                log_progress,
                Status.FAILED,
                error=xml_exc)
            return False
        return True
예제 #26
0
def execute_process(self, job_id, url, headers=None):
    from weaver.wps.service import get_pywps_service

    LOGGER.debug("Job execute process called.")
    settings = get_settings(app)
    task_logger = get_task_logger(__name__)
    load_pywps_config(settings)

    task_logger.debug("Job task setup.")

    # reset the connection because we are in a forked celery process
    db = get_db(app, reset_connection=True)
    store = db.get_store(StoreJobs)

    job = store.fetch_by_id(job_id)
    job.task_id = self.request.id
    job.progress = JOB_PROGRESS_SETUP
    job.save_log(logger=task_logger, message="Job task setup completed.")
    job = store.update_job(job)

    try:
        try:
            job.progress = JOB_PROGRESS_DESCRIBE
            job.save_log(
                logger=task_logger,
                message="Execute WPS request for process [{!s}]".format(
                    job.process))
            ssl_verify = get_ssl_verify_option("get", url, settings=settings)
            wps = WebProcessingService(url=url,
                                       headers=get_cookie_headers(headers),
                                       verify=ssl_verify)
            set_wps_language(wps, accept_language=job.accept_language)
            raise_on_xml_exception(wps._capabilities)  # noqa
        except Exception as ex:
            raise OWSNoApplicableCode(
                "Failed to retrieve WPS capabilities. Error: [{}].".format(
                    str(ex)))
        try:
            process = wps.describeprocess(job.process)
        except Exception as ex:
            raise OWSNoApplicableCode(
                "Failed to retrieve WPS process description. Error: [{}].".
                format(str(ex)))

        # prepare inputs
        job.progress = JOB_PROGRESS_GET_INPUTS
        job.save_log(logger=task_logger,
                     message="Fetching job input definitions.")
        complex_inputs = []
        for process_input in process.dataInputs:
            if WPS_COMPLEX_DATA in process_input.dataType:
                complex_inputs.append(process_input.identifier)

        try:
            wps_inputs = list()
            for process_input in job.inputs:
                input_id = get_any_id(process_input)
                process_value = get_any_value(process_input)
                # in case of array inputs, must repeat (id,value)
                input_values = process_value if isinstance(
                    process_value, list) else [process_value]

                # we need to support file:// scheme but PyWPS doesn't like them so remove the scheme file://
                input_values = [
                    val[7:] if str(val).startswith("file://") else val
                    for val in input_values
                ]

                # need to use ComplexDataInput structure for complex input
                # need to use literal String for anything else than complex
                # TODO: BoundingBox not supported
                wps_inputs.extend([
                    (input_id, ComplexDataInput(input_value)
                     if input_id in complex_inputs else str(input_value))
                    for input_value in input_values
                ])
        except KeyError:
            wps_inputs = []

        # prepare outputs
        job.progress = JOB_PROGRESS_GET_OUTPUTS
        job.save_log(logger=task_logger,
                     message="Fetching job output definitions.")
        wps_outputs = [(o.identifier, o.dataType == WPS_COMPLEX_DATA)
                       for o in process.processOutputs]

        mode = EXECUTE_MODE_ASYNC if job.execute_async else EXECUTE_MODE_SYNC
        job.progress = JOB_PROGRESS_EXECUTE_REQUEST
        job.save_log(logger=task_logger,
                     message="Starting job process execution.")
        job.save_log(
            logger=task_logger,
            message=
            "Following updates could take a while until the Application Package answers..."
        )

        wps_worker = get_pywps_service(environ=settings, is_worker=True)
        execution = wps_worker.execute_job(job.process,
                                           wps_inputs=wps_inputs,
                                           wps_outputs=wps_outputs,
                                           mode=mode,
                                           job_uuid=job.id)
        if not execution.process and execution.errors:
            raise execution.errors[0]

        # adjust status location
        wps_status_path = get_wps_local_status_location(
            execution.statusLocation, settings)
        job.progress = JOB_PROGRESS_EXECUTE_STATUS_LOCATION
        LOGGER.debug("WPS status location that will be queried: [%s]",
                     wps_status_path)
        if not wps_status_path.startswith("http") and not os.path.isfile(
                wps_status_path):
            LOGGER.warning(
                "WPS status location not resolved to local path: [%s]",
                wps_status_path)
        job.save_log(logger=task_logger,
                     level=logging.DEBUG,
                     message="Updated job status location: [{}].".format(
                         wps_status_path))

        job.status = map_status(STATUS_STARTED)
        job.status_message = execution.statusMessage or "{} initiation done.".format(
            str(job))
        job.status_location = wps_status_path
        job.request = execution.request
        job.response = execution.response
        job.progress = JOB_PROGRESS_EXECUTE_MONITOR_START
        job.save_log(logger=task_logger,
                     message="Starting monitoring of job execution.")
        job = store.update_job(job)

        max_retries = 5
        num_retries = 0
        run_step = 0
        while execution.isNotComplete() or run_step == 0:
            if num_retries >= max_retries:
                raise Exception(
                    "Could not read status document after {} retries. Giving up."
                    .format(max_retries))
            try:
                # NOTE:
                #   Don't actually log anything here until process is completed (success or fail) so that underlying
                #   WPS execution logs can be inserted within the current job log and appear continuously.
                #   Only update internal job fields in case they get referenced elsewhere.
                job.progress = JOB_PROGRESS_EXECUTE_MONITOR_LOOP
                execution = check_wps_status(location=wps_status_path,
                                             settings=settings,
                                             sleep_secs=wait_secs(run_step))
                job_msg = (execution.statusMessage or "").strip()
                job.response = execution.response
                job.status = map_status(execution.getStatus())
                job.status_message = "Job execution monitoring (progress: {}%, status: {})."\
                                     .format(execution.percentCompleted, job_msg or "n/a")
                # job.save_log(logger=task_logger)
                # job = store.update_job(job)

                if execution.isComplete():
                    job.mark_finished()
                    job.progress = JOB_PROGRESS_EXECUTE_MONITOR_END
                    msg_progress = " (status: {})".format(
                        job_msg) if job_msg else ""
                    if execution.isSucceded():
                        job.status = map_status(STATUS_SUCCEEDED)
                        job.status_message = "Job succeeded{}.".format(
                            msg_progress)
                        wps_package.retrieve_package_job_log(execution, job)
                        job.save_log(logger=task_logger)
                        job_results = [
                            ows2json_output(output, process, settings)
                            for output in execution.processOutputs
                        ]
                        job.results = make_results_relative(
                            job_results, settings)
                    else:
                        task_logger.debug("Job failed.")
                        job.status_message = "Job failed{}.".format(
                            msg_progress)
                        wps_package.retrieve_package_job_log(execution, job)
                        job.save_log(errors=execution.errors,
                                     logger=task_logger)
                    task_logger.debug(
                        "Mapping Job references with generated WPS locations.")
                    map_locations(job, settings)

            except Exception as exc:
                num_retries += 1
                task_logger.debug("Exception raised: %s", repr(exc))
                job.status_message = "Could not read status XML document for {!s}. Trying again...".format(
                    job)
                job.save_log(errors=execution.errors, logger=task_logger)
                sleep(1)
            else:
                # job.status_message = "Update {}...".format(str(job))
                # job.save_log(logger=task_logger)
                num_retries = 0
                run_step += 1
            finally:
                job = store.update_job(job)

    except Exception as exc:
        LOGGER.exception("Failed running [%s]", job)
        job.status = map_status(STATUS_FAILED)
        job.status_message = "Failed to run {!s}.".format(job)
        job.progress = JOB_PROGRESS_EXECUTE_MONITOR_ERROR
        exception_class = "{}.{}".format(
            type(exc).__module__,
            type(exc).__name__)
        errors = "{0}: {1!s}".format(exception_class, exc)
        job.save_log(errors=errors, logger=task_logger)
    finally:
        job.progress = JOB_PROGRESS_EXECUTE_MONITOR_END
        job.status_message = "Job {}.".format(job.status)
        job.save_log(logger=task_logger)

        # Send email if requested
        if job.notification_email is not None:
            job.progress = JOB_PROGRESS_NOTIFY
            try:
                notify_job_complete(job, job.notification_email, settings)
                message = "Notification email sent successfully."
                job.save_log(logger=task_logger, message=message)
            except Exception as exc:
                exception_class = "{}.{}".format(
                    type(exc).__module__,
                    type(exc).__name__)
                exception = "{0}: {1!s}".format(exception_class, exc)
                message = "Couldn't send notification email ({})".format(
                    exception)
                job.save_log(errors=message,
                             logger=task_logger,
                             message=message)

        job.progress = JOB_PROGRESS_DONE
        job.save_log(logger=task_logger, message="Job task complete.")
        job = store.update_job(job)

    return job.status