示例#1
0
 def finish_job(self, job_state):
     stderr = stdout = ''
     job_wrapper = job_state.job_wrapper
     try:
         client = self.get_client_from_state(job_state)
         run_results = client.full_status()
         remote_metadata_directory = run_results.get(
             "metadata_directory", None)
         stdout = run_results.get('stdout', '')
         stderr = run_results.get('stderr', '')
         exit_code = run_results.get('returncode', None)
         pulsar_outputs = PulsarOutputs.from_status_response(run_results)
         # Use Pulsar client code to transfer/copy files back
         # and cleanup job if needed.
         completed_normally = \
             job_wrapper.get_state() not in [model.Job.states.ERROR, model.Job.states.DELETED]
         cleanup_job = job_wrapper.cleanup_job
         client_outputs = self.__client_outputs(client, job_wrapper)
         finish_args = dict(client=client,
                            job_completed_normally=completed_normally,
                            cleanup_job=cleanup_job,
                            client_outputs=client_outputs,
                            pulsar_outputs=pulsar_outputs)
         failed = pulsar_finish_job(**finish_args)
         if failed:
             job_wrapper.fail(
                 "Failed to find or download one or more job outputs from remote server.",
                 exception=True)
     except Exception:
         self.fail_job(job_state,
                       message=GENERIC_REMOTE_ERROR,
                       exception=True)
         log.exception("failure finishing job %d", job_wrapper.job_id)
         return
     if not PulsarJobRunner.__remote_metadata(client):
         self._handle_metadata_externally(job_wrapper,
                                          resolve_requirements=True)
     # Finish the job
     try:
         job_metrics_directory = os.path.join(job_wrapper.working_directory,
                                              "metadata")
         # Following check is a hack for jobs started during 19.01 or earlier release
         # and finishing with a 19.05 code base. Eliminate the hack in 19.09 or later
         # along with hacks for legacy metadata compute strategy.
         if not os.path.exists(job_metrics_directory) or not any([
                 "__instrument" in f
                 for f in os.listdir(job_metrics_directory)
         ]):
             job_metrics_directory = job_wrapper.working_directory
         job_wrapper.finish(
             stdout,
             stderr,
             exit_code,
             remote_metadata_directory=remote_metadata_directory,
             job_metrics_directory=job_metrics_directory,
         )
     except Exception:
         log.exception("Job wrapper finish method failed")
         job_wrapper.fail("Unable to finish job", exception=True)
示例#2
0
 def finish_job(self, job_state):
     stderr = stdout = ''
     job_wrapper = job_state.job_wrapper
     try:
         client = self.get_client_from_state(job_state)
         run_results = client.full_status()
         remote_working_directory = run_results.get("working_directory",
                                                    None)
         remote_metadata_directory = run_results.get(
             "metadata_directory", None)
         stdout = run_results.get('stdout', '')
         stderr = run_results.get('stderr', '')
         exit_code = run_results.get('returncode', None)
         pulsar_outputs = PulsarOutputs.from_status_response(run_results)
         # Use Pulsar client code to transfer/copy files back
         # and cleanup job if needed.
         completed_normally = \
             job_wrapper.get_state() not in [ model.Job.states.ERROR, model.Job.states.DELETED ]
         cleanup_job = job_wrapper.cleanup_job
         client_outputs = self.__client_outputs(client, job_wrapper)
         finish_args = dict(client=client,
                            job_completed_normally=completed_normally,
                            cleanup_job=cleanup_job,
                            client_outputs=client_outputs,
                            pulsar_outputs=pulsar_outputs)
         failed = pulsar_finish_job(**finish_args)
         if failed:
             job_wrapper.fail(
                 "Failed to find or download one or more job outputs from remote server.",
                 exception=True)
     except Exception:
         message = GENERIC_REMOTE_ERROR
         job_wrapper.fail(message, exception=True)
         log.exception("failure finishing job %d" % job_wrapper.job_id)
         return
     if not PulsarJobRunner.__remote_metadata(client):
         self._handle_metadata_externally(job_wrapper,
                                          resolve_requirements=True)
     # Finish the job
     try:
         job_wrapper.finish(
             stdout,
             stderr,
             exit_code,
             remote_working_directory=remote_working_directory,
             remote_metadata_directory=remote_metadata_directory,
         )
     except Exception:
         log.exception("Job wrapper finish method failed")
         job_wrapper.fail("Unable to finish job", exception=True)
示例#3
0
def __finish(options, client, client_outputs, result_status):
    pulsar_outputs = PulsarOutputs.from_status_response(result_status)
    cleanup_job = 'always'
    if not getattr(options, 'cleanup', True):
        cleanup_job = 'never'
    finish_args = dict(
        client=client,
        job_completed_normally=True,
        cleanup_job=cleanup_job,  # Default should 'always' if overridden via options.
        client_outputs=client_outputs,
        pulsar_outputs=pulsar_outputs,
    )
    failed = finish_job(**finish_args)
    if failed:
        failed_message_template = "Failed to complete job correctly, final status %s, finish exceptions %s."
        failed_message = failed_message_template % (result_status, failed)
        assert False, failed_message
示例#4
0
 def finish_job( self, job_state ):
     stderr = stdout = ''
     job_wrapper = job_state.job_wrapper
     try:
         client = self.get_client_from_state(job_state)
         run_results = client.full_status()
         remote_working_directory = run_results.get("working_directory", None)
         remote_metadata_directory = run_results.get("metadata_directory", None)
         stdout = run_results.get('stdout', '')
         stderr = run_results.get('stderr', '')
         exit_code = run_results.get('returncode', None)
         pulsar_outputs = PulsarOutputs.from_status_response(run_results)
         # Use Pulsar client code to transfer/copy files back
         # and cleanup job if needed.
         completed_normally = \
             job_wrapper.get_state() not in [ model.Job.states.ERROR, model.Job.states.DELETED ]
         cleanup_job = job_wrapper.cleanup_job
         client_outputs = self.__client_outputs(client, job_wrapper)
         finish_args = dict( client=client,
                             job_completed_normally=completed_normally,
                             cleanup_job=cleanup_job,
                             client_outputs=client_outputs,
                             pulsar_outputs=pulsar_outputs )
         failed = pulsar_finish_job( **finish_args )
         if failed:
             job_wrapper.fail("Failed to find or download one or more job outputs from remote server.", exception=True)
     except Exception:
         message = GENERIC_REMOTE_ERROR
         job_wrapper.fail( message, exception=True )
         log.exception("failure finishing job %d" % job_wrapper.job_id)
         return
     if not PulsarJobRunner.__remote_metadata( client ):
         self._handle_metadata_externally( job_wrapper, resolve_requirements=True )
     # Finish the job
     try:
         job_wrapper.finish(
             stdout,
             stderr,
             exit_code,
             remote_working_directory=remote_working_directory,
             remote_metadata_directory=remote_metadata_directory,
         )
     except Exception:
         log.exception("Job wrapper finish method failed")
         job_wrapper.fail("Unable to finish job", exception=True)
示例#5
0
def _run_client_for_job(args):
    if args.job_id is None:
        args.job_id = str(uuid.uuid4())
    output_patterns = []
    output_patterns.extend(args.output_pattern)
    for output in args.output:
        output_patterns.append(fnmatch.translate(output))

    client_options = extract_client_options(args)
    client, client_manager = client_info(args, client_options)
    try:
        working_directory = args.working_directory
        client_outputs = ClientOutputs(
            working_directory=working_directory,
            dynamic_outputs=output_patterns,
        )
        job_description = ClientJobDescription(
            command_line=args.command,
            working_directory=working_directory,
            client_outputs=client_outputs,
        )
        submit_job(client, job_description)
        waiter = Waiter(client, client_manager)
        result_status = waiter.wait()
        pulsar_outputs = PulsarOutputs.from_status_response(result_status)
        if args.result_json:
            open(args.result_json, "w").write(json_dumps(result_status))
        finish_args = dict(
            client=client,
            job_completed_normally=True,
            cleanup_job=args.cleanup,
            client_outputs=client_outputs,
            pulsar_outputs=pulsar_outputs,
        )
        failed = finish_job(**finish_args)
        return failed
    finally:
        client_manager.shutdown()