def execute_job(self, process_id, wps_inputs, wps_outputs, mode, job_uuid): """ Real execution of the process by active Celery Worker. """ execution = WPSExecution(version="2.0", url="localhost") xml_request = execution.buildRequest(process_id, wps_inputs, wps_outputs, mode=mode, lineage=True) wps_request = WPSRequest() wps_request.identifier = process_id wps_request.set_version("2.0.0") request_parser = wps_request._post_request_parser( wps_request.WPS.Execute().tag) # noqa: W0212 request_parser(xml_request) # NOTE: # Setting 'status = false' will disable async execution of 'pywps.app.Process.Process' # but this is needed since this job is running within Celery worker already async # (daemon process can't have children processes) # Because if how the code in PyWPS is made, we have to re-enable creation of status file wps_request.status = "false" wps_response = super(WorkerService, self).execute(process_id, wps_request, job_uuid) wps_response.store_status_file = True # update execution status with actual status file and apply required references execution = check_wps_status( location=wps_response.process.status_location, settings=self.settings) execution.request = xml_request return execution
def execute_job( self, job, # type: Job wps_inputs, # type: List[WPS_InputData] wps_outputs, # type: List[WPS_OutputRequested] remote_process, # type: Optional[Process] headers, # type: Optional[AnyHeadersContainer] ): # type: (...) -> WPSExecution """ Real execution of the process by active Celery Worker. """ process_id = job.process execution = WPSExecution(version="2.0", url="localhost") xml_request = execution.buildRequest(process_id, wps_inputs, wps_outputs, mode=job.execution_mode, lineage=True) wps_request = WorkerRequest(http_headers=headers) wps_request.identifier = process_id wps_request.check_and_set_language(job.accept_language) wps_request.set_version("2.0.0") request_parser = wps_request._post_request_parser( wps_request.WPS.Execute().tag) # noqa: W0212 request_parser( xml_request ) # parses the submitted inputs/outputs data and request parameters # NOTE: # Setting 'status = false' will disable async execution of 'pywps.app.Process.Process' # but this is needed since this job is running within Celery worker already async # (daemon process can't have children processes). wps_request.status = "false" # When 'execute' is called, pywps will in turn call 'prepare_process_for_execution', # which then setups and retrieves currently loaded 'local' processes. # Since only local processes were defined by 'get_pywps_service', # a temporary process must be added for remote providers execution. if not remote_process: worker_process_id = process_id else: worker_process_id = f"wps_package-{process_id}-{job.uuid}" self.dispatched_processes[worker_process_id] = remote_process wps_response = super(WorkerService, self).execute(worker_process_id, wps_request, job.uuid) # re-enable creation of status file so we can find it since we disabled 'status' earlier for sync execution wps_response.store_status_file = True # update execution status with actual status file and apply required references execution = check_wps_status( location=wps_response.process.status_location, settings=self.settings) execution.request = xml_request return execution
def monitor(self, monitor_reference): # type: (JobExecution) -> bool execution = monitor_reference["execution"] max_retries = 20 # using 'wait_secs' incremental delays, this is ~3min of retry attempts num_retries = 0 run_step = 0 job_id = "<undefined>" while execution.isNotComplete() or run_step == 0: if num_retries >= max_retries: raise Exception( "Could not read status document after {} retries. Giving up." .format(max_retries)) try: execution = check_wps_status(location=execution.statusLocation, sleep_secs=wait_secs(run_step), settings=self.settings) monitor_reference[ "execution"] = execution # update reference for later stages job_id = execution.statusLocation.split("/")[-1].replace( ".xml", "") exec_status = status.map_status(execution.getStatus()) LOGGER.debug( get_log_monitor_msg(job_id, exec_status, execution.percentCompleted, execution.statusMessage, execution.statusLocation)) log_msg = get_job_log_msg(status=exec_status, message=execution.statusMessage, progress=execution.percentCompleted, duration=None) # get if available log_progress = map_progress(execution.percentCompleted, REMOTE_JOB_PROGRESS_MONITOR, REMOTE_JOB_PROGRESS_RESULTS) self.update_status(log_msg, log_progress, status.STATUS_RUNNING) except Exception as exc: num_retries += 1 LOGGER.debug("Exception raised: %r", exc) sleep(1) else: num_retries = 0 run_step += 1 if not execution.isSucceded(): exec_msg = execution.statusMessage or "Job failed." exec_status = status.map_status(execution.getStatus()) LOGGER.debug( get_log_monitor_msg(job_id, exec_status, execution.percentCompleted, exec_msg, execution.statusLocation)) return False return True
def execute(self, workflow_inputs, out_dir, expected_outputs): self.update_status("Preparing execute request for remote WPS1 provider.", REMOTE_JOB_PROGRESS_REQ_PREP, status.STATUS_RUNNING) LOGGER.debug("Execute process WPS request for %s", self.process) try: try: wps = WebProcessingService(url=self.provider, headers=self.cookies, verify=self.verify) raise_on_xml_exception(wps._capabilities) # noqa: W0212 except Exception as ex: raise OWSNoApplicableCode("Failed to retrieve WPS capabilities. Error: [{}].".format(str(ex))) try: process = wps.describeprocess(self.process) except Exception as ex: raise OWSNoApplicableCode("Failed to retrieve WPS process description. Error: [{}].".format(str(ex))) # prepare inputs complex_inputs = [] for process_input in process.dataInputs: if WPS_COMPLEX_DATA in process_input.dataType: complex_inputs.append(process_input.identifier) # remove any 'null' input, should employ the 'default' of the remote WPS process inputs_provided_keys = filter(lambda i: workflow_inputs[i] != "null", workflow_inputs) wps_inputs = [] for input_key in inputs_provided_keys: input_val = workflow_inputs[input_key] # in case of array inputs, must repeat (id,value) # in case of complex input (File), obtain location, otherwise get data value if not isinstance(input_val, list): input_val = [input_val] input_values = [] for val in input_val: if isinstance(val, dict): val = val["location"] # owslib only accepts strings, not numbers directly if isinstance(val, (int, float)): val = str(val) if val.startswith("file://"): # we need to host file starting with file:// scheme val = self.host_file(val) input_values.append(val) # need to use ComplexDataInput structure for complex input # TODO: BoundingBox not supported for input_value in input_values: if input_key in complex_inputs: input_value = ComplexDataInput(input_value) wps_inputs.append((input_key, input_value)) # prepare outputs outputs = [(o.identifier, o.dataType == WPS_COMPLEX_DATA) for o in process.processOutputs if o.identifier in expected_outputs] self.update_status("Executing job on remote WPS1 provider.", REMOTE_JOB_PROGRESS_EXECUTION, status.STATUS_RUNNING) mode = EXECUTE_MODE_ASYNC execution = wps.execute(self.process, inputs=wps_inputs, output=outputs, mode=mode, lineage=True) if not execution.process and execution.errors: raise execution.errors[0] self.update_status("Monitoring job on remote WPS1 provider : [{0}]".format(self.provider), REMOTE_JOB_PROGRESS_MONITORING, status.STATUS_RUNNING) max_retries = 5 num_retries = 0 run_step = 0 job_id = "<undefined>" while execution.isNotComplete() or run_step == 0: if num_retries >= max_retries: raise Exception("Could not read status document after {} retries. Giving up.".format(max_retries)) try: execution = check_wps_status(location=execution.statusLocation, verify=self.verify, sleep_secs=wait_secs(run_step)) job_id = execution.statusLocation.replace(".xml", "").split("/")[-1] LOGGER.debug(get_log_monitor_msg(job_id, status.map_status(execution.getStatus()), execution.percentCompleted, execution.statusMessage, execution.statusLocation)) self.update_status(get_job_log_msg(status=status.map_status(execution.getStatus()), message=execution.statusMessage, progress=execution.percentCompleted, duration=None), # get if available map_progress(execution.percentCompleted, REMOTE_JOB_PROGRESS_MONITORING, REMOTE_JOB_PROGRESS_FETCH_OUT), status.STATUS_RUNNING) except Exception as exc: num_retries += 1 LOGGER.debug("Exception raised: %r", exc) sleep(1) else: num_retries = 0 run_step += 1 if not execution.isSucceded(): exec_msg = execution.statusMessage or "Job failed." LOGGER.debug(get_log_monitor_msg(job_id, status.map_status(execution.getStatus()), execution.percentCompleted, exec_msg, execution.statusLocation)) raise Exception(execution.statusMessage or "Job failed.") self.update_status("Fetching job outputs from remote WPS1 provider.", REMOTE_JOB_PROGRESS_FETCH_OUT, status.STATUS_RUNNING) results = [ows2json_output(output, process) for output in execution.processOutputs] for result in results: result_id = get_any_id(result) result_val = get_any_value(result) if result_id in expected_outputs: # This is where cwl expect the output file to be written # TODO We will probably need to handle multiple output value... dst_fn = "/".join([out_dir.rstrip("/"), expected_outputs[result_id]]) # TODO Should we handle other type than File reference? resp = request_extra("get", result_val, allow_redirects=True, settings=self.settings) LOGGER.debug("Fetching result output from [%s] to cwl output destination: [%s]", result_val, dst_fn) with open(dst_fn, mode="wb") as dst_fh: dst_fh.write(resp.content) except Exception as exc: exception_class = "{}.{}".format(type(exc).__module__, type(exc).__name__) errors = "{0}: {1!s}".format(exception_class, exc) LOGGER.exception(exc) raise Exception(errors) self.update_status("Execution on remote WPS1 provider completed.", REMOTE_JOB_PROGRESS_COMPLETED, status.STATUS_SUCCEEDED)
def execute_process(task, job_id, wps_url, headers=None): # type: (Task, UUID, str, Optional[HeadersType]) -> StatusType """ Celery task that executes the WPS process job monitoring as status updates (local and remote). """ from weaver.wps.service import get_pywps_service LOGGER.debug("Job execute process called.") task_process = get_celery_process() rss_start = task_process.memory_info().rss registry = get_registry( None) # local thread, whether locally or dispatched celery settings = get_settings(registry) db = get_db( registry, reset_connection=True ) # reset the connection because we are in a forked celery process store = db.get_store(StoreJobs) job = store.fetch_by_id(job_id) job.started = now() job.status = Status.STARTED # will be mapped to 'RUNNING' job.status_message = f"Job {Status.STARTED}." # will preserve detail of STARTED vs RUNNING job.save_log(message=job.status_message) task_logger = get_task_logger(__name__) job.save_log(logger=task_logger, message="Job task setup initiated.") load_pywps_config(settings) job.progress = JobProgress.SETUP job.task_id = task.request.id job.save_log(logger=task_logger, message="Job task setup completed.") job = store.update_job(job) # Flag to keep track if job is running in background (remote-WPS, CWL app, etc.). # If terminate signal is sent to worker task via API dismiss request while still running in background, # the raised exception within the task will switch the job to Status.FAILED, but this will not raise an # exception here. Since the task execution 'succeeds' without raising, it skips directly to the last 'finally'. # Patch it back to Status.DISMISSED in this case. task_terminated = True try: job.progress = JobProgress.DESCRIBE job.save_log(logger=task_logger, message=f"Employed WPS URL: [{wps_url!s}]", level=logging.DEBUG) job.save_log( logger=task_logger, message=f"Execute WPS request for process [{job.process!s}]") wps_process = fetch_wps_process(job, wps_url, headers, settings) # prepare inputs job.progress = JobProgress.GET_INPUTS job.save_log(logger=task_logger, message="Fetching job input definitions.") wps_inputs = parse_wps_inputs(wps_process, job) # prepare outputs job.progress = JobProgress.GET_OUTPUTS job.save_log(logger=task_logger, message="Fetching job output definitions.") wps_outputs = [(o.identifier, o.dataType == WPS_COMPLEX_DATA) for o in wps_process.processOutputs] # if process refers to a remote WPS provider, pass it down to avoid unnecessary re-fetch request if job.is_local: process = None # already got all the information needed pre-loaded in PyWPS service else: service = Service(name=job.service, url=wps_url) process = Process.from_ows(wps_process, service, settings) job.progress = JobProgress.EXECUTE_REQUEST job.save_log(logger=task_logger, message="Starting job process execution.") job.save_log( logger=task_logger, message= "Following updates could take a while until the Application Package answers..." ) wps_worker = get_pywps_service(environ=settings, is_worker=True) execution = wps_worker.execute_job(job, wps_inputs=wps_inputs, wps_outputs=wps_outputs, remote_process=process, headers=headers) if not execution.process and execution.errors: raise execution.errors[0] # adjust status location wps_status_path = get_wps_local_status_location( execution.statusLocation, settings) job.progress = JobProgress.EXECUTE_STATUS_LOCATION LOGGER.debug("WPS status location that will be queried: [%s]", wps_status_path) if not wps_status_path.startswith("http") and not os.path.isfile( wps_status_path): LOGGER.warning( "WPS status location not resolved to local path: [%s]", wps_status_path) job.save_log( logger=task_logger, level=logging.DEBUG, message=f"Updated job status location: [{wps_status_path}].") job.status = Status.RUNNING job.status_message = execution.statusMessage or f"{job!s} initiation done." job.status_location = wps_status_path job.request = execution.request job.response = execution.response job.progress = JobProgress.EXECUTE_MONITOR_START job.save_log(logger=task_logger, message="Starting monitoring of job execution.") job = store.update_job(job) max_retries = 5 num_retries = 0 run_step = 0 while execution.isNotComplete() or run_step == 0: if num_retries >= max_retries: job.save_log(errors=execution.errors, logger=task_logger) job = store.update_job(job) raise Exception( f"Could not read status document after {max_retries} retries. Giving up." ) try: # NOTE: # Don't actually log anything here until process is completed (success or fail) so that underlying # WPS execution logs can be inserted within the current job log and appear continuously. # Only update internal job fields in case they get referenced elsewhere. progress_min = JobProgress.EXECUTE_MONITOR_LOOP progress_max = JobProgress.EXECUTE_MONITOR_DONE job.progress = progress_min run_delay = wait_secs(run_step) execution = check_wps_status(location=wps_status_path, settings=settings, sleep_secs=run_delay) job_msg = (execution.statusMessage or "").strip() job.response = execution.response job.status = map_status(execution.getStatus()) job_status_msg = job_msg or "n/a" job_percent = execution.percentCompleted job.status_message = f"Job execution monitoring (progress: {job_percent}%, status: {job_status_msg})." if execution.isComplete(): msg_progress = f" (status: {job_msg})" if job_msg else "" if execution.isSucceded(): wps_package.retrieve_package_job_log( execution, job, progress_min, progress_max) job.status = map_status(Status.SUCCEEDED) job.status_message = f"Job succeeded{msg_progress}." job.progress = progress_max job.save_log(logger=task_logger) job_results = [ ows2json_output_data(output, process, settings) for output in execution.processOutputs ] job.results = make_results_relative( job_results, settings) else: task_logger.debug("Job failed.") wps_package.retrieve_package_job_log( execution, job, progress_min, progress_max) job.status_message = f"Job failed{msg_progress}." job.progress = progress_max job.save_log(errors=execution.errors, logger=task_logger) task_logger.debug( "Mapping Job references with generated WPS locations.") map_locations(job, settings) job = store.update_job(job) except Exception as exc: num_retries += 1 task_logger.debug("Exception raised: %s", repr(exc)) job.status_message = f"Could not read status XML document for {job!s}. Trying again..." job.save_log(errors=execution.errors, logger=task_logger) job = store.update_job(job) sleep(1) else: num_retries = 0 run_step += 1 finally: task_terminated = False # reached only if WPS execution completed (worker not terminated beforehand) job = store.update_job(job) except Exception as exc: # if 'execute_job' finishes quickly before even reaching the 'monitoring loop' # consider WPS execution produced an error (therefore Celery worker not terminated) task_terminated = False LOGGER.exception("Failed running [%s]", job) LOGGER.debug("Failed job [%s] raised an exception.", job, exc_info=exc) # note: don't update the progress here to preserve last one that was set job.status = map_status(Status.FAILED) job.status_message = f"Failed to run {job!s}." errors = f"{fully_qualified_name(exc)}: {exc!s}" job.save_log(errors=errors, logger=task_logger) job = store.update_job(job) finally: # if task worker terminated, local 'job' is out of date compared to remote/background runner last update job = store.fetch_by_id(job.id) if task_terminated and map_status(job.status) == Status.FAILED: job.status = Status.DISMISSED task_success = map_status( job.status) not in JOB_STATUS_CATEGORIES[StatusCategory.FAILED] collect_statistics(task_process, settings, job, rss_start) if task_success: job.progress = JobProgress.EXECUTE_MONITOR_END job.status_message = f"Job {job.status}." job.save_log(logger=task_logger) if task_success: job.progress = JobProgress.NOTIFY send_job_complete_notification_email(job, task_logger, settings) if job.status not in JOB_STATUS_CATEGORIES[StatusCategory.FINISHED]: job.status = Status.SUCCEEDED job.status_message = f"Job {job.status}." job.mark_finished() if task_success: job.progress = JobProgress.DONE job.save_log(logger=task_logger, message="Job task complete.") job = store.update_job(job) return job.status
def monitor(self, monitor_reference): # type: (JobExecution) -> bool execution = monitor_reference["execution"] max_retries = 20 # using 'wait_secs' incremental delays, this is ~3min of retry attempts num_retries = 0 run_step = 0 job_id = "<undefined>" log_progress = Wps1RemoteJobProgress.MONITOR while execution.isNotComplete() or run_step == 0: if num_retries >= max_retries: raise Exception( f"Could not read status document after {max_retries} retries. Giving up." ) try: execution = check_wps_status(location=execution.statusLocation, sleep_secs=wait_secs(run_step), settings=self.settings) monitor_reference[ "execution"] = execution # update reference for later stages job_id = execution.statusLocation.split("/")[-1].replace( ".xml", "") exec_status = map_status(execution.getStatus()) LOGGER.debug( get_log_monitor_msg(job_id, exec_status, execution.percentCompleted, execution.statusMessage, execution.statusLocation)) log_msg = get_job_log_msg(status=exec_status, message=execution.statusMessage, progress=execution.percentCompleted, duration=None) # get if available log_progress = map_progress(execution.percentCompleted, Wps1RemoteJobProgress.MONITOR, Wps1RemoteJobProgress.RESULTS) self.update_status(log_msg, log_progress, Status.RUNNING) except Exception as exc: num_retries += 1 LOGGER.debug("Exception raised: %r", exc) sleep(1) else: num_retries = 0 run_step += 1 if not execution.isSucceded(): exec_msg = execution.statusMessage or "Job failed." exec_status = map_status(execution.getStatus()) exec_status_url = execution.statusLocation LOGGER.debug( get_log_monitor_msg(job_id, exec_status, execution.percentCompleted, exec_msg, exec_status_url)) # provide more details in logs of parent job process about the cause of the failing remote execution xml_err = bytes2str(xml_util.tostring(execution.response)) xml_exc = get_exception_from_xml_status(execution.response) self.update_status( f"Retrieved error status response from WPS remote provider on [{exec_status_url}]:\n{xml_err}\n", log_progress, Status.FAILED, error=xml_exc) return False return True
def execute_process(self, job_id, url, headers=None): from weaver.wps.service import get_pywps_service LOGGER.debug("Job execute process called.") settings = get_settings(app) task_logger = get_task_logger(__name__) load_pywps_config(settings) task_logger.debug("Job task setup.") # reset the connection because we are in a forked celery process db = get_db(app, reset_connection=True) store = db.get_store(StoreJobs) job = store.fetch_by_id(job_id) job.task_id = self.request.id job.progress = JOB_PROGRESS_SETUP job.save_log(logger=task_logger, message="Job task setup completed.") job = store.update_job(job) try: try: job.progress = JOB_PROGRESS_DESCRIBE job.save_log( logger=task_logger, message="Execute WPS request for process [{!s}]".format( job.process)) ssl_verify = get_ssl_verify_option("get", url, settings=settings) wps = WebProcessingService(url=url, headers=get_cookie_headers(headers), verify=ssl_verify) set_wps_language(wps, accept_language=job.accept_language) raise_on_xml_exception(wps._capabilities) # noqa except Exception as ex: raise OWSNoApplicableCode( "Failed to retrieve WPS capabilities. Error: [{}].".format( str(ex))) try: process = wps.describeprocess(job.process) except Exception as ex: raise OWSNoApplicableCode( "Failed to retrieve WPS process description. Error: [{}].". format(str(ex))) # prepare inputs job.progress = JOB_PROGRESS_GET_INPUTS job.save_log(logger=task_logger, message="Fetching job input definitions.") complex_inputs = [] for process_input in process.dataInputs: if WPS_COMPLEX_DATA in process_input.dataType: complex_inputs.append(process_input.identifier) try: wps_inputs = list() for process_input in job.inputs: input_id = get_any_id(process_input) process_value = get_any_value(process_input) # in case of array inputs, must repeat (id,value) input_values = process_value if isinstance( process_value, list) else [process_value] # we need to support file:// scheme but PyWPS doesn't like them so remove the scheme file:// input_values = [ val[7:] if str(val).startswith("file://") else val for val in input_values ] # need to use ComplexDataInput structure for complex input # need to use literal String for anything else than complex # TODO: BoundingBox not supported wps_inputs.extend([ (input_id, ComplexDataInput(input_value) if input_id in complex_inputs else str(input_value)) for input_value in input_values ]) except KeyError: wps_inputs = [] # prepare outputs job.progress = JOB_PROGRESS_GET_OUTPUTS job.save_log(logger=task_logger, message="Fetching job output definitions.") wps_outputs = [(o.identifier, o.dataType == WPS_COMPLEX_DATA) for o in process.processOutputs] mode = EXECUTE_MODE_ASYNC if job.execute_async else EXECUTE_MODE_SYNC job.progress = JOB_PROGRESS_EXECUTE_REQUEST job.save_log(logger=task_logger, message="Starting job process execution.") job.save_log( logger=task_logger, message= "Following updates could take a while until the Application Package answers..." ) wps_worker = get_pywps_service(environ=settings, is_worker=True) execution = wps_worker.execute_job(job.process, wps_inputs=wps_inputs, wps_outputs=wps_outputs, mode=mode, job_uuid=job.id) if not execution.process and execution.errors: raise execution.errors[0] # adjust status location wps_status_path = get_wps_local_status_location( execution.statusLocation, settings) job.progress = JOB_PROGRESS_EXECUTE_STATUS_LOCATION LOGGER.debug("WPS status location that will be queried: [%s]", wps_status_path) if not wps_status_path.startswith("http") and not os.path.isfile( wps_status_path): LOGGER.warning( "WPS status location not resolved to local path: [%s]", wps_status_path) job.save_log(logger=task_logger, level=logging.DEBUG, message="Updated job status location: [{}].".format( wps_status_path)) job.status = map_status(STATUS_STARTED) job.status_message = execution.statusMessage or "{} initiation done.".format( str(job)) job.status_location = wps_status_path job.request = execution.request job.response = execution.response job.progress = JOB_PROGRESS_EXECUTE_MONITOR_START job.save_log(logger=task_logger, message="Starting monitoring of job execution.") job = store.update_job(job) max_retries = 5 num_retries = 0 run_step = 0 while execution.isNotComplete() or run_step == 0: if num_retries >= max_retries: raise Exception( "Could not read status document after {} retries. Giving up." .format(max_retries)) try: # NOTE: # Don't actually log anything here until process is completed (success or fail) so that underlying # WPS execution logs can be inserted within the current job log and appear continuously. # Only update internal job fields in case they get referenced elsewhere. job.progress = JOB_PROGRESS_EXECUTE_MONITOR_LOOP execution = check_wps_status(location=wps_status_path, settings=settings, sleep_secs=wait_secs(run_step)) job_msg = (execution.statusMessage or "").strip() job.response = execution.response job.status = map_status(execution.getStatus()) job.status_message = "Job execution monitoring (progress: {}%, status: {})."\ .format(execution.percentCompleted, job_msg or "n/a") # job.save_log(logger=task_logger) # job = store.update_job(job) if execution.isComplete(): job.mark_finished() job.progress = JOB_PROGRESS_EXECUTE_MONITOR_END msg_progress = " (status: {})".format( job_msg) if job_msg else "" if execution.isSucceded(): job.status = map_status(STATUS_SUCCEEDED) job.status_message = "Job succeeded{}.".format( msg_progress) wps_package.retrieve_package_job_log(execution, job) job.save_log(logger=task_logger) job_results = [ ows2json_output(output, process, settings) for output in execution.processOutputs ] job.results = make_results_relative( job_results, settings) else: task_logger.debug("Job failed.") job.status_message = "Job failed{}.".format( msg_progress) wps_package.retrieve_package_job_log(execution, job) job.save_log(errors=execution.errors, logger=task_logger) task_logger.debug( "Mapping Job references with generated WPS locations.") map_locations(job, settings) except Exception as exc: num_retries += 1 task_logger.debug("Exception raised: %s", repr(exc)) job.status_message = "Could not read status XML document for {!s}. Trying again...".format( job) job.save_log(errors=execution.errors, logger=task_logger) sleep(1) else: # job.status_message = "Update {}...".format(str(job)) # job.save_log(logger=task_logger) num_retries = 0 run_step += 1 finally: job = store.update_job(job) except Exception as exc: LOGGER.exception("Failed running [%s]", job) job.status = map_status(STATUS_FAILED) job.status_message = "Failed to run {!s}.".format(job) job.progress = JOB_PROGRESS_EXECUTE_MONITOR_ERROR exception_class = "{}.{}".format( type(exc).__module__, type(exc).__name__) errors = "{0}: {1!s}".format(exception_class, exc) job.save_log(errors=errors, logger=task_logger) finally: job.progress = JOB_PROGRESS_EXECUTE_MONITOR_END job.status_message = "Job {}.".format(job.status) job.save_log(logger=task_logger) # Send email if requested if job.notification_email is not None: job.progress = JOB_PROGRESS_NOTIFY try: notify_job_complete(job, job.notification_email, settings) message = "Notification email sent successfully." job.save_log(logger=task_logger, message=message) except Exception as exc: exception_class = "{}.{}".format( type(exc).__module__, type(exc).__name__) exception = "{0}: {1!s}".format(exception_class, exc) message = "Couldn't send notification email ({})".format( exception) job.save_log(errors=message, logger=task_logger, message=message) job.progress = JOB_PROGRESS_DONE job.save_log(logger=task_logger, message="Job task complete.") job = store.update_job(job) return job.status