def finish_job(self, job_state): stderr = stdout = '' job_wrapper = job_state.job_wrapper try: client = self.get_client_from_state(job_state) run_results = client.full_status() remote_metadata_directory = run_results.get( "metadata_directory", None) stdout = run_results.get('stdout', '') stderr = run_results.get('stderr', '') exit_code = run_results.get('returncode', None) pulsar_outputs = PulsarOutputs.from_status_response(run_results) # Use Pulsar client code to transfer/copy files back # and cleanup job if needed. completed_normally = \ job_wrapper.get_state() not in [model.Job.states.ERROR, model.Job.states.DELETED] cleanup_job = job_wrapper.cleanup_job client_outputs = self.__client_outputs(client, job_wrapper) finish_args = dict(client=client, job_completed_normally=completed_normally, cleanup_job=cleanup_job, client_outputs=client_outputs, pulsar_outputs=pulsar_outputs) failed = pulsar_finish_job(**finish_args) if failed: job_wrapper.fail( "Failed to find or download one or more job outputs from remote server.", exception=True) except Exception: self.fail_job(job_state, message=GENERIC_REMOTE_ERROR, exception=True) log.exception("failure finishing job %d", job_wrapper.job_id) return if not PulsarJobRunner.__remote_metadata(client): self._handle_metadata_externally(job_wrapper, resolve_requirements=True) # Finish the job try: job_metrics_directory = os.path.join(job_wrapper.working_directory, "metadata") # Following check is a hack for jobs started during 19.01 or earlier release # and finishing with a 19.05 code base. Eliminate the hack in 19.09 or later # along with hacks for legacy metadata compute strategy. if not os.path.exists(job_metrics_directory) or not any([ "__instrument" in f for f in os.listdir(job_metrics_directory) ]): job_metrics_directory = job_wrapper.working_directory job_wrapper.finish( stdout, stderr, exit_code, remote_metadata_directory=remote_metadata_directory, job_metrics_directory=job_metrics_directory, ) except Exception: log.exception("Job wrapper finish method failed") job_wrapper.fail("Unable to finish job", exception=True)
def finish_job(self, job_state): stderr = stdout = '' job_wrapper = job_state.job_wrapper try: client = self.get_client_from_state(job_state) run_results = client.full_status() remote_working_directory = run_results.get("working_directory", None) remote_metadata_directory = run_results.get( "metadata_directory", None) stdout = run_results.get('stdout', '') stderr = run_results.get('stderr', '') exit_code = run_results.get('returncode', None) pulsar_outputs = PulsarOutputs.from_status_response(run_results) # Use Pulsar client code to transfer/copy files back # and cleanup job if needed. completed_normally = \ job_wrapper.get_state() not in [ model.Job.states.ERROR, model.Job.states.DELETED ] cleanup_job = job_wrapper.cleanup_job client_outputs = self.__client_outputs(client, job_wrapper) finish_args = dict(client=client, job_completed_normally=completed_normally, cleanup_job=cleanup_job, client_outputs=client_outputs, pulsar_outputs=pulsar_outputs) failed = pulsar_finish_job(**finish_args) if failed: job_wrapper.fail( "Failed to find or download one or more job outputs from remote server.", exception=True) except Exception: message = GENERIC_REMOTE_ERROR job_wrapper.fail(message, exception=True) log.exception("failure finishing job %d" % job_wrapper.job_id) return if not PulsarJobRunner.__remote_metadata(client): self._handle_metadata_externally(job_wrapper, resolve_requirements=True) # Finish the job try: job_wrapper.finish( stdout, stderr, exit_code, remote_working_directory=remote_working_directory, remote_metadata_directory=remote_metadata_directory, ) except Exception: log.exception("Job wrapper finish method failed") job_wrapper.fail("Unable to finish job", exception=True)
def __finish(options, client, client_outputs, result_status): pulsar_outputs = PulsarOutputs.from_status_response(result_status) cleanup_job = 'always' if not getattr(options, 'cleanup', True): cleanup_job = 'never' finish_args = dict( client=client, job_completed_normally=True, cleanup_job=cleanup_job, # Default should 'always' if overridden via options. client_outputs=client_outputs, pulsar_outputs=pulsar_outputs, ) failed = finish_job(**finish_args) if failed: failed_message_template = "Failed to complete job correctly, final status %s, finish exceptions %s." failed_message = failed_message_template % (result_status, failed) assert False, failed_message
def finish_job( self, job_state ): stderr = stdout = '' job_wrapper = job_state.job_wrapper try: client = self.get_client_from_state(job_state) run_results = client.full_status() remote_working_directory = run_results.get("working_directory", None) remote_metadata_directory = run_results.get("metadata_directory", None) stdout = run_results.get('stdout', '') stderr = run_results.get('stderr', '') exit_code = run_results.get('returncode', None) pulsar_outputs = PulsarOutputs.from_status_response(run_results) # Use Pulsar client code to transfer/copy files back # and cleanup job if needed. completed_normally = \ job_wrapper.get_state() not in [ model.Job.states.ERROR, model.Job.states.DELETED ] cleanup_job = job_wrapper.cleanup_job client_outputs = self.__client_outputs(client, job_wrapper) finish_args = dict( client=client, job_completed_normally=completed_normally, cleanup_job=cleanup_job, client_outputs=client_outputs, pulsar_outputs=pulsar_outputs ) failed = pulsar_finish_job( **finish_args ) if failed: job_wrapper.fail("Failed to find or download one or more job outputs from remote server.", exception=True) except Exception: message = GENERIC_REMOTE_ERROR job_wrapper.fail( message, exception=True ) log.exception("failure finishing job %d" % job_wrapper.job_id) return if not PulsarJobRunner.__remote_metadata( client ): self._handle_metadata_externally( job_wrapper, resolve_requirements=True ) # Finish the job try: job_wrapper.finish( stdout, stderr, exit_code, remote_working_directory=remote_working_directory, remote_metadata_directory=remote_metadata_directory, ) except Exception: log.exception("Job wrapper finish method failed") job_wrapper.fail("Unable to finish job", exception=True)
def _run_client_for_job(args): if args.job_id is None: args.job_id = str(uuid.uuid4()) output_patterns = [] output_patterns.extend(args.output_pattern) for output in args.output: output_patterns.append(fnmatch.translate(output)) client_options = extract_client_options(args) client, client_manager = client_info(args, client_options) try: working_directory = args.working_directory client_outputs = ClientOutputs( working_directory=working_directory, dynamic_outputs=output_patterns, ) job_description = ClientJobDescription( command_line=args.command, working_directory=working_directory, client_outputs=client_outputs, ) submit_job(client, job_description) waiter = Waiter(client, client_manager) result_status = waiter.wait() pulsar_outputs = PulsarOutputs.from_status_response(result_status) if args.result_json: open(args.result_json, "w").write(json_dumps(result_status)) finish_args = dict( client=client, job_completed_normally=True, cleanup_job=args.cleanup, client_outputs=client_outputs, pulsar_outputs=pulsar_outputs, ) failed = finish_job(**finish_args) return failed finally: client_manager.shutdown()