Exemplo n.º 1
0
    def finish_job(self, job_state):
        """
        Get the output/error for a finished job, pass to `job_wrapper.finish`
        and cleanup all the job's temporary files.
        """
        galaxy_id_tag = job_state.job_wrapper.get_id_tag()
        external_job_id = job_state.job_id

        # To ensure that files below are readable, ownership must be reclaimed first
        job_state.job_wrapper.reclaim_ownership()

        # wait for the files to appear
        which_try = 0
        collect_output_success = True
        while which_try < self.app.config.retry_job_output_collection + 1:
            try:
                with open(job_state.output_file, "rb") as stdout_file, open(job_state.error_file, 'rb') as stderr_file:
                    stdout = shrink_stream_by_size(stdout_file, DATABASE_MAX_STRING_SIZE, join_by="\n..\n", left_larger=True, beginning_on_size_error=True)
                    stderr = shrink_stream_by_size(stderr_file, DATABASE_MAX_STRING_SIZE, join_by="\n..\n", left_larger=True, beginning_on_size_error=True)
                break
            except Exception as e:
                if which_try == self.app.config.retry_job_output_collection:
                    stdout = ''
                    stderr = job_state.runner_states.JOB_OUTPUT_NOT_RETURNED_FROM_CLUSTER
                    log.error('(%s/%s) %s: %s' % (galaxy_id_tag, external_job_id, stderr, str(e)))
                    collect_output_success = False
                else:
                    time.sleep(1)
                which_try += 1

        if not collect_output_success:
            job_state.fail_message = stderr
            job_state.runner_state = job_state.runner_states.JOB_OUTPUT_NOT_RETURNED_FROM_CLUSTER
            self.mark_as_failed(job_state)
            return

        try:
            # This should be an 8-bit exit code, but read ahead anyway:
            exit_code_str = open(job_state.exit_code_file, "r").read(32)
        except Exception:
            # By default, the exit code is 0, which typically indicates success.
            exit_code_str = "0"

        try:
            # Decode the exit code. If it's bogus, then just use 0.
            exit_code = int(exit_code_str)
        except ValueError:
            log.warning("(%s/%s) Exit code '%s' invalid. Using 0." % (galaxy_id_tag, external_job_id, exit_code_str))
            exit_code = 0

        # clean up the job files
        cleanup_job = job_state.job_wrapper.cleanup_job
        if cleanup_job == "always" or (not stderr and cleanup_job == "onsuccess"):
            job_state.cleanup()

        try:
            self._finish_or_resubmit_job(job_state, stdout, stderr, exit_code)
        except Exception:
            log.exception("(%s/%s) Job wrapper finish method failed" % (galaxy_id_tag, external_job_id))
            job_state.job_wrapper.fail("Unable to finish job", exception=True)
Exemplo n.º 2
0
    def queue_job( self, job_wrapper ):
        # prepare the job
        if not self.prepare_job( job_wrapper ):
            return

        stderr = stdout = ''
        exit_code = 0

        # command line has been added to the wrapper by prepare_job()
        command_line = self.__command_line( job_wrapper )

        job_id = job_wrapper.get_id_tag()

        try:
            log.debug( '(%s) executing: %s' % ( job_id, command_line ) )
            stdout_file = tempfile.NamedTemporaryFile( suffix='_stdout', dir=job_wrapper.working_directory )
            stderr_file = tempfile.NamedTemporaryFile( suffix='_stderr', dir=job_wrapper.working_directory )
            proc = subprocess.Popen( args=command_line,
                                     shell=True,
                                     cwd=job_wrapper.working_directory,
                                     stdout=stdout_file,
                                     stderr=stderr_file,
                                     env=self._environ,
                                     preexec_fn=os.setpgrp )
            job_wrapper.set_job_destination(job_wrapper.job_destination, proc.pid)
            job_wrapper.change_state( model.Job.states.RUNNING )
            job_start = datetime.datetime.now()
            i = 0
            # Iterate until the process exits, periodically checking its limits
            while proc.poll() is None:
                i += 1
                if (i % 20) == 0:
                    msg = job_wrapper.check_limits(runtime=datetime.datetime.now() - job_start)
                    if msg is not None:
                        job_wrapper.fail(msg)
                        log.debug('(%s) Terminating process group' % job_id)
                        self._terminate(proc)
                        return
                else:
                    sleep(1)
            # Reap the process and get the exit code.
            exit_code = proc.wait()
            stdout_file.seek( 0 )
            stderr_file.seek( 0 )
            stdout = shrink_stream_by_size( stdout_file, DATABASE_MAX_STRING_SIZE, join_by="\n..\n", left_larger=True, beginning_on_size_error=True )
            stderr = shrink_stream_by_size( stderr_file, DATABASE_MAX_STRING_SIZE, join_by="\n..\n", left_larger=True, beginning_on_size_error=True )
            stdout_file.close()
            stderr_file.close()
            log.debug('execution finished: %s' % command_line)
        except Exception:
            job_wrapper.fail( "failure running job", exception=True )
            log.exception("failure running job %d" % job_wrapper.job_id)
            return
        self._handle_metadata_externally( job_wrapper, resolve_requirements=True )
        # Finish the job!
        try:
            job_wrapper.finish( stdout, stderr, exit_code )
        except:
            log.exception("Job wrapper finish method failed")
            job_wrapper.fail("Unable to finish job", exception=True)
Exemplo n.º 3
0
    def finish_job( self, job_state ):
        """
        Get the output/error for a finished job, pass to `job_wrapper.finish`
        and cleanup all the job's temporary files.
        """
        galaxy_id_tag = job_state.job_wrapper.get_id_tag()
        external_job_id = job_state.job_id

        # To ensure that files below are readable, ownership must be reclaimed first
        job_state.job_wrapper.reclaim_ownership()

        # wait for the files to appear
        which_try = 0
        while which_try < (self.app.config.retry_job_output_collection + 1):
            try:
                stdout = shrink_stream_by_size( file( job_state.output_file, "r" ), DATABASE_MAX_STRING_SIZE, join_by="\n..\n", left_larger=True, beginning_on_size_error=True )
                stderr = shrink_stream_by_size( file( job_state.error_file, "r" ), DATABASE_MAX_STRING_SIZE, join_by="\n..\n", left_larger=True, beginning_on_size_error=True )
                which_try = (self.app.config.retry_job_output_collection + 1)
            except Exception, e:
                if which_try == self.app.config.retry_job_output_collection:
                    stdout = ''
                    stderr = 'Job output not returned from cluster'
                    log.error( '(%s/%s) %s: %s' % ( galaxy_id_tag, external_job_id, stderr, str( e ) ) )
                else:
                    time.sleep(1)
                which_try += 1
Exemplo n.º 4
0
    def finish_job( self, job_state ):
        """
        Get the output/error for a finished job, pass to `job_wrapper.finish`
        and cleanup all the job's temporary files.
        """
        galaxy_id_tag = job_state.job_wrapper.get_id_tag()
        external_job_id = job_state.job_id

        # To ensure that files below are readable, ownership must be reclaimed first
        job_state.job_wrapper.reclaim_ownership()

        # wait for the files to appear
        which_try = 0
        while which_try < (self.app.config.retry_job_output_collection + 1):
            try:
                stdout = shrink_stream_by_size( file( job_state.output_file, "r" ), DATABASE_MAX_STRING_SIZE, join_by="\n..\n", left_larger=True, beginning_on_size_error=True )
                stderr = shrink_stream_by_size( file( job_state.error_file, "r" ), DATABASE_MAX_STRING_SIZE, join_by="\n..\n", left_larger=True, beginning_on_size_error=True )
                which_try = (self.app.config.retry_job_output_collection + 1)
            except Exception, e:
                if which_try == self.app.config.retry_job_output_collection:
                    stdout = ''
                    stderr = 'Job output not returned from cluster'
                    log.error( '(%s/%s) %s: %s' % ( galaxy_id_tag, external_job_id, stderr, str( e ) ) )
                else:
                    time.sleep(1)
                which_try += 1
    def finish_job(self, job_state):
        """
        Get the output/error for a finished job, pass to `job_wrapper.finish`
        and cleanup all the job's temporary files.
        """
        galaxy_id_tag = job_state.job_wrapper.get_id_tag()
        external_job_id = job_state.job_id

        # To ensure that files below are readable, ownership must be reclaimed first
        job_state.job_wrapper.reclaim_ownership()

        # wait for the files to appear
        which_try = 0
        collect_output_success = True
        while which_try < self.app.config.retry_job_output_collection + 1:
            try:
                stdout = shrink_stream_by_size(open(job_state.output_file, "r"), DATABASE_MAX_STRING_SIZE, join_by="\n..\n", left_larger=True, beginning_on_size_error=True)
                stderr = shrink_stream_by_size(open(job_state.error_file, "r"), DATABASE_MAX_STRING_SIZE, join_by="\n..\n", left_larger=True, beginning_on_size_error=True)
                break
            except Exception as e:
                if which_try == self.app.config.retry_job_output_collection:
                    stdout = ''
                    stderr = job_state.runner_states.JOB_OUTPUT_NOT_RETURNED_FROM_CLUSTER
                    log.error('(%s/%s) %s: %s' % (galaxy_id_tag, external_job_id, stderr, str(e)))
                    collect_output_success = False
                else:
                    time.sleep(1)
                which_try += 1

        if not collect_output_success:
            job_state.fail_message = stderr
            job_state.runner_state = job_state.runner_states.JOB_OUTPUT_NOT_RETURNED_FROM_CLUSTER
            self.mark_as_failed(job_state)
            return

        try:
            # This should be an 8-bit exit code, but read ahead anyway:
            exit_code_str = open(job_state.exit_code_file, "r").read(32)
        except Exception:
            # By default, the exit code is 0, which typically indicates success.
            exit_code_str = "0"

        try:
            # Decode the exit code. If it's bogus, then just use 0.
            exit_code = int(exit_code_str)
        except ValueError:
            log.warning("(%s/%s) Exit code '%s' invalid. Using 0." % (galaxy_id_tag, external_job_id, exit_code_str))
            exit_code = 0

        # clean up the job files
        cleanup_job = job_state.job_wrapper.cleanup_job
        if cleanup_job == "always" or (not stderr and cleanup_job == "onsuccess"):
            job_state.cleanup()

        try:
            self._finish_or_resubmit_job(job_state, stdout, stderr, exit_code)
        except Exception:
            log.exception("(%s/%s) Job wrapper finish method failed" % (galaxy_id_tag, external_job_id))
            job_state.job_wrapper.fail("Unable to finish job", exception=True)
Exemplo n.º 6
0
    def queue_job( self, job_wrapper ):
        # prepare the job
        include_metadata = asbool( job_wrapper.job_destination.params.get( "embed_metadata_in_job", DEFAULT_EMBED_METADATA_IN_JOB ) )
        if not self.prepare_job( job_wrapper, include_metadata=include_metadata ):
            return

        stderr = stdout = ''
        exit_code = 0

        # command line has been added to the wrapper by prepare_job()
        command_line, exit_code_path = self.__command_line( job_wrapper )
        job_id = job_wrapper.get_id_tag()

        try:
            stdout_file = tempfile.NamedTemporaryFile( suffix='_stdout', dir=job_wrapper.working_directory )
            stderr_file = tempfile.NamedTemporaryFile( suffix='_stderr', dir=job_wrapper.working_directory )
            log.debug( '(%s) executing job script: %s' % ( job_id, command_line ) )
            proc = subprocess.Popen( args=command_line,
                                     shell=True,
                                     cwd=job_wrapper.working_directory,
                                     stdout=stdout_file,
                                     stderr=stderr_file,
                                     env=self._environ,
                                     preexec_fn=os.setpgrp )
            job_wrapper.set_job_destination(job_wrapper.job_destination, proc.pid)
            job_wrapper.change_state( model.Job.states.RUNNING )

            terminated = self.__poll_if_needed( proc, job_wrapper, job_id )
            if terminated:
                return

            # Reap the process and get the exit code.
            exit_code = proc.wait()
            try:
                exit_code = int( open( exit_code_path, 'r' ).read() )
            except Exception:
                log.warn( "Failed to read exit code from path %s" % exit_code_path )
                pass
            stdout_file.seek( 0 )
            stderr_file.seek( 0 )
            stdout = shrink_stream_by_size( stdout_file, DATABASE_MAX_STRING_SIZE, join_by="\n..\n", left_larger=True, beginning_on_size_error=True )
            stderr = shrink_stream_by_size( stderr_file, DATABASE_MAX_STRING_SIZE, join_by="\n..\n", left_larger=True, beginning_on_size_error=True )
            stdout_file.close()
            stderr_file.close()
            log.debug('execution finished: %s' % command_line)
        except Exception:
            log.exception("failure running job %d" % job_wrapper.job_id)
            job_wrapper.fail( "failure running job", exception=True )
            return
        external_metadata = not asbool( job_wrapper.job_destination.params.get( "embed_metadata_in_job", DEFAULT_EMBED_METADATA_IN_JOB ) )
        if external_metadata:
            self._handle_metadata_externally( job_wrapper, resolve_requirements=True )
        # Finish the job!
        try:
            job_wrapper.finish( stdout, stderr, exit_code )
        except:
            log.exception("Job wrapper finish method failed")
            job_wrapper.fail("Unable to finish job", exception=True)
Exemplo n.º 7
0
    def finish_job(self, pbs_job_state):
        """
        Get the output/error for a finished job, pass to `job_wrapper.finish`
        and cleanup all the PBS temporary files.
        """
        ofile = pbs_job_state.output_file
        efile = pbs_job_state.error_file
        ecfile = pbs_job_state.exit_code_file
        job_file = pbs_job_state.job_file
        # collect the output
        try:
            ofh = file(ofile, "r")
            efh = file(efile, "r")
            ecfh = file(ecfile, "r")
            stdout = shrink_stream_by_size(ofh,
                                           DATABASE_MAX_STRING_SIZE,
                                           join_by="\n..\n",
                                           left_larger=True,
                                           beginning_on_size_error=True)
            stderr = shrink_stream_by_size(efh,
                                           DATABASE_MAX_STRING_SIZE,
                                           join_by="\n..\n",
                                           left_larger=True,
                                           beginning_on_size_error=True)
            # This should be an 8-bit exit code, but read ahead anyway:
            exit_code_str = ecfh.read(32)
        except:
            stdout = ''
            stderr = 'Job output not returned by PBS: the output datasets were deleted while the job was running, the job was manually dequeued or there was a cluster error.'
            # By default, the exit code is 0, which usually indicates success
            # (although clearly some error happened).
            exit_code_str = ""

        # Translate the exit code string to an integer; use 0 on failure.
        try:
            exit_code = int(exit_code_str)
        except:
            log.warning("Exit code " + exit_code_str +
                        " was invalid. Using 0.")
            exit_code = 0

        # Call on the job wrapper to complete the call:
        try:
            pbs_job_state.job_wrapper.finish(stdout, stderr, exit_code)
        except:
            log.exception("Job wrapper finish method failed")
            pbs_job_state.job_wrapper.fail("Unable to finish job",
                                           exception=True)

        # clean up the pbs files
        if self.app.config.cleanup_job == "always" or (
                not stderr and self.app.config.cleanup_job == "onsuccess"):
            self.cleanup((ofile, efile, ecfile, job_file))
Exemplo n.º 8
0
    def finish_job( self, job_state ):
        """
        Get the output/error for a finished job, pass to `job_wrapper.finish`
        and cleanup all the job's temporary files.
        """
        galaxy_id_tag = job_state.job_wrapper.get_id_tag()
        external_job_id = job_state.job_id

        # To ensure that files below are readable, ownership must be reclaimed first
        job_state.job_wrapper.reclaim_ownership()

        # wait for the files to appear
        which_try = 0
        while which_try < (self.app.config.retry_job_output_collection + 1):
            try:
                stdout = shrink_stream_by_size( open( job_state.output_file, "r" ), DATABASE_MAX_STRING_SIZE, join_by="\n..\n", left_larger=True, beginning_on_size_error=True )
                stderr = shrink_stream_by_size( open( job_state.error_file, "r" ), DATABASE_MAX_STRING_SIZE, join_by="\n..\n", left_larger=True, beginning_on_size_error=True )
                which_try = (self.app.config.retry_job_output_collection + 1)
            except Exception as e:
                if which_try == self.app.config.retry_job_output_collection:
                    stdout = ''
                    stderr = 'Job output not returned from cluster'
                    log.error( '(%s/%s) %s: %s' % ( galaxy_id_tag, external_job_id, stderr, str( e ) ) )
                else:
                    time.sleep(1)
                which_try += 1

        try:
            # This should be an 8-bit exit code, but read ahead anyway:
            exit_code_str = open( job_state.exit_code_file, "r" ).read(32)
        except:
            # By default, the exit code is 0, which typically indicates success.
            exit_code_str = "0"

        try:
            # Decode the exit code. If it's bogus, then just use 0.
            exit_code = int(exit_code_str)
        except:
            log.warning( "(%s/%s) Exit code '%s' invalid. Using 0." % ( galaxy_id_tag, external_job_id, exit_code_str ) )
            exit_code = 0

        # clean up the job files
        cleanup_job = job_state.job_wrapper.cleanup_job
        if cleanup_job == "always" or ( not stderr and cleanup_job == "onsuccess" ):
            job_state.cleanup()

        try:
            job_state.job_wrapper.finish( stdout, stderr, exit_code )
        except:
            log.exception( "(%s/%s) Job wrapper finish method failed" % ( galaxy_id_tag, external_job_id ) )
            job_state.job_wrapper.fail( "Unable to finish job", exception=True )
Exemplo n.º 9
0
    def finish_job( self, job_state ):
        """
        Get the output/error for a finished job, pass to `job_wrapper.finish`
        and cleanup all the job's temporary files.
        """
        galaxy_id_tag = job_state.job_wrapper.get_id_tag()
        external_job_id = job_state.job_id

        # To ensure that files below are readable, ownership must be reclaimed first
        job_state.job_wrapper.reclaim_ownership()

        # wait for the files to appear
        which_try = 0
        while which_try < (self.app.config.retry_job_output_collection + 1):
            try:
                stdout = shrink_stream_by_size( file( job_state.output_file, "r" ), DATABASE_MAX_STRING_SIZE, join_by="\n..\n", left_larger=True, beginning_on_size_error=True )
                stderr = shrink_stream_by_size( file( job_state.error_file, "r" ), DATABASE_MAX_STRING_SIZE, join_by="\n..\n", left_larger=True, beginning_on_size_error=True )
                which_try = (self.app.config.retry_job_output_collection + 1)
            except Exception as e:
                if which_try == self.app.config.retry_job_output_collection:
                    stdout = ''
                    stderr = 'Job output not returned from cluster'
                    log.error( '(%s/%s) %s: %s' % ( galaxy_id_tag, external_job_id, stderr, str( e ) ) )
                else:
                    time.sleep(1)
                which_try += 1

        try:
            # This should be an 8-bit exit code, but read ahead anyway:
            exit_code_str = file( job_state.exit_code_file, "r" ).read(32)
        except:
            # By default, the exit code is 0, which typically indicates success.
            exit_code_str = "0"

        try:
            # Decode the exit code. If it's bogus, then just use 0.
            exit_code = int(exit_code_str)
        except:
            log.warning( "(%s/%s) Exit code '%s' invalid. Using 0." % ( galaxy_id_tag, external_job_id, exit_code_str ) )
            exit_code = 0

        # clean up the job files
        if self.app.config.cleanup_job == "always" or ( not stderr and self.app.config.cleanup_job == "onsuccess" ):
            job_state.cleanup()

        try:
            job_state.job_wrapper.finish( stdout, stderr, exit_code )
        except:
            log.exception( "(%s/%s) Job wrapper finish method failed" % ( galaxy_id_tag, external_job_id ) )
            job_state.job_wrapper.fail( "Unable to finish job", exception=True )
Exemplo n.º 10
0
    def finish_job(self, pbs_job_state):
        """
        Get the output/error for a finished job, pass to `job_wrapper.finish`
        and cleanup all the PBS temporary files.
        """
        ofile = pbs_job_state.output_file
        efile = pbs_job_state.error_file
        ecfile = pbs_job_state.exit_code_file
        job_file = pbs_job_state.job_file
        # collect the output
        try:
            ofh = file(ofile, "r")
            efh = file(efile, "r")
            ecfh = file(ecfile, "r")
            stdout = shrink_stream_by_size(
                ofh, DATABASE_MAX_STRING_SIZE, join_by="\n..\n", left_larger=True, beginning_on_size_error=True
            )
            stderr = shrink_stream_by_size(
                efh, DATABASE_MAX_STRING_SIZE, join_by="\n..\n", left_larger=True, beginning_on_size_error=True
            )
            # This should be an 8-bit exit code, but read ahead anyway:
            exit_code_str = ecfh.read(32)
        except:
            stdout = ""
            stderr = "Job output not returned by PBS: the output datasets were deleted while the job was running, the job was manually dequeued or there was a cluster error."
            # By default, the exit code is 0, which usually indicates success
            # (although clearly some error happened).
            exit_code_str = ""

        # Translate the exit code string to an integer; use 0 on failure.
        try:
            exit_code = int(exit_code_str)
        except:
            log.warning("Exit code " + exit_code_str + " was invalid. Using 0.")
            exit_code = 0

        # Call on the job wrapper to complete the call:
        try:
            pbs_job_state.job_wrapper.finish(stdout, stderr, exit_code)
        except:
            log.exception("Job wrapper finish method failed")
            pbs_job_state.job_wrapper.fail("Unable to finish job", exception=True)

        # clean up the pbs files
        if self.app.config.cleanup_job == "always" or (not stderr and self.app.config.cleanup_job == "onsuccess"):
            self.cleanup((ofile, efile, ecfile, job_file))
Exemplo n.º 11
0
 def _job_io_for_db(self, stream):
     return shrink_stream_by_size(stream, DATABASE_MAX_STRING_SIZE, join_by="\n..\n", left_larger=True, beginning_on_size_error=True)
Exemplo n.º 12
0
    def queue_job(self, job_wrapper):
        if not self._prepare_job_local(job_wrapper):
            return

        stderr = stdout = ''
        exit_code = 0

        # command line has been added to the wrapper by prepare_job()
        command_line, exit_code_path = self.__command_line(job_wrapper)
        job_id = job_wrapper.get_id_tag()

        try:
            stdout_file = tempfile.NamedTemporaryFile(
                mode='wb+',
                suffix='_stdout',
                dir=job_wrapper.working_directory)
            stderr_file = tempfile.NamedTemporaryFile(
                mode='wb+',
                suffix='_stderr',
                dir=job_wrapper.working_directory)
            log.debug('(%s) executing job script: %s' % (job_id, command_line))
            proc = subprocess.Popen(args=command_line,
                                    shell=True,
                                    cwd=job_wrapper.working_directory,
                                    stdout=stdout_file,
                                    stderr=stderr_file,
                                    env=self._environ,
                                    preexec_fn=os.setpgrp)

            proc.terminated_by_shutdown = False
            with self._proc_lock:
                self._procs.append(proc)

            try:
                job_wrapper.set_job_destination(job_wrapper.job_destination,
                                                proc.pid)
                job_wrapper.change_state(model.Job.states.RUNNING)

                terminated = self.__poll_if_needed(proc, job_wrapper, job_id)
                if terminated:
                    return

                # Reap the process and get the exit code.
                exit_code = proc.wait()

            finally:
                with self._proc_lock:
                    self._procs.remove(proc)

            try:
                exit_code = int(open(exit_code_path, 'r').read())
            except Exception:
                log.warning("Failed to read exit code from path %s" %
                            exit_code_path)
                pass

            if proc.terminated_by_shutdown:
                self._fail_job_local(job_wrapper,
                                     "job terminated by Galaxy shutdown")
                return

            stdout_file.seek(0)
            stderr_file.seek(0)
            stdout = shrink_stream_by_size(stdout_file,
                                           DATABASE_MAX_STRING_SIZE,
                                           join_by="\n..\n",
                                           left_larger=True,
                                           beginning_on_size_error=True)
            stderr = shrink_stream_by_size(stderr_file,
                                           DATABASE_MAX_STRING_SIZE,
                                           join_by="\n..\n",
                                           left_larger=True,
                                           beginning_on_size_error=True)
            stdout_file.close()
            stderr_file.close()
            log.debug('execution finished: %s' % command_line)
        except Exception:
            log.exception("failure running job %d", job_wrapper.job_id)
            self._fail_job_local(job_wrapper, "failure running job")
            return

        self._handle_metadata_if_needed(job_wrapper)

        job_destination = job_wrapper.job_destination
        job_state = JobState(job_wrapper, job_destination)
        job_state.stop_job = False
        # Finish the job!
        try:
            self._finish_or_resubmit_job(job_state, stdout, stderr, exit_code)
        except Exception:
            log.exception("Job wrapper finish method failed")
            self._fail_job_local(job_wrapper, "Unable to finish job")
Exemplo n.º 13
0
    def queue_job(self, job_wrapper):
        if not self._prepare_job_local(job_wrapper):
            return

        stderr = stdout = ''
        exit_code = 0

        # command line has been added to the wrapper by prepare_job()
        command_line, exit_code_path = self.__command_line(job_wrapper)
        job_id = job_wrapper.get_id_tag()

        try:
            stdout_file = tempfile.NamedTemporaryFile(mode='wb+', suffix='_stdout', dir=job_wrapper.working_directory)
            stderr_file = tempfile.NamedTemporaryFile(mode='wb+', suffix='_stderr', dir=job_wrapper.working_directory)
            log.debug('(%s) executing job script: %s' % (job_id, command_line))
            proc = subprocess.Popen(args=command_line,
                                    shell=True,
                                    cwd=job_wrapper.working_directory,
                                    stdout=stdout_file,
                                    stderr=stderr_file,
                                    env=self._environ,
                                    preexec_fn=os.setpgrp)

            proc.terminated_by_shutdown = False
            with self._proc_lock:
                self._procs.append(proc)

            try:
                job_wrapper.set_job_destination(job_wrapper.job_destination, proc.pid)
                job_wrapper.change_state(model.Job.states.RUNNING)

                terminated = self.__poll_if_needed(proc, job_wrapper, job_id)
                if terminated:
                    return

                # Reap the process and get the exit code.
                exit_code = proc.wait()

            finally:
                with self._proc_lock:
                    self._procs.remove(proc)

            try:
                exit_code = int(open(exit_code_path, 'r').read())
            except Exception:
                log.warning("Failed to read exit code from path %s" % exit_code_path)
                pass

            if proc.terminated_by_shutdown:
                self._fail_job_local(job_wrapper, "job terminated by Galaxy shutdown")
                return

            stdout_file.seek(0)
            stderr_file.seek(0)
            stdout = shrink_stream_by_size(stdout_file, DATABASE_MAX_STRING_SIZE, join_by="\n..\n", left_larger=True, beginning_on_size_error=True)
            stderr = shrink_stream_by_size(stderr_file, DATABASE_MAX_STRING_SIZE, join_by="\n..\n", left_larger=True, beginning_on_size_error=True)
            stdout_file.close()
            stderr_file.close()
            log.debug('execution finished: %s' % command_line)
        except Exception:
            log.exception("failure running job %d", job_wrapper.job_id)
            self._fail_job_local(job_wrapper, "failure running job")
            return

        self._handle_metadata_if_needed(job_wrapper)

        job_destination = job_wrapper.job_destination
        job_state = JobState(job_wrapper, job_destination)
        job_state.stop_job = False
        # Finish the job!
        try:
            self._finish_or_resubmit_job(job_state, stdout, stderr, exit_code)
        except Exception:
            log.exception("Job wrapper finish method failed")
            self._fail_job_local(job_wrapper, "Unable to finish job")
Exemplo n.º 14
0
    def queue_job(self, job_wrapper):
        # prepare the job
        if not self.prepare_job(job_wrapper):
            return

        stderr = stdout = ''
        exit_code = 0

        # command line has been added to the wrapper by prepare_job()
        command_line = job_wrapper.runner_command_line

        job_id = job_wrapper.get_id_tag()

        try:
            log.debug('(%s) executing: %s' % (job_id, command_line))
            stdout_file = tempfile.NamedTemporaryFile(
                suffix='_stdout', dir=job_wrapper.working_directory)
            stderr_file = tempfile.NamedTemporaryFile(
                suffix='_stderr', dir=job_wrapper.working_directory)
            proc = subprocess.Popen(args=command_line,
                                    shell=True,
                                    cwd=job_wrapper.working_directory,
                                    stdout=stdout_file,
                                    stderr=stderr_file,
                                    env=os.environ,
                                    preexec_fn=os.setpgrp)
            job_wrapper.set_job_destination(job_wrapper.job_destination,
                                            proc.pid)
            job_wrapper.change_state(model.Job.states.RUNNING)
            job_start = datetime.datetime.now()
            i = 0
            # Iterate until the process exits, periodically checking its limits
            while proc.poll() is None:
                i += 1
                if (i % 20) == 0:
                    msg = job_wrapper.check_limits(
                        runtime=datetime.datetime.now() - job_start)
                    if msg is not None:
                        job_wrapper.fail(msg)
                        log.debug('(%s) Terminating process group' % job_id)
                        self._terminate(proc)
                        return
                else:
                    sleep(1)
            # Reap the process and get the exit code.
            exit_code = proc.wait()
            stdout_file.seek(0)
            stderr_file.seek(0)
            stdout = shrink_stream_by_size(stdout_file,
                                           DATABASE_MAX_STRING_SIZE,
                                           join_by="\n..\n",
                                           left_larger=True,
                                           beginning_on_size_error=True)
            stderr = shrink_stream_by_size(stderr_file,
                                           DATABASE_MAX_STRING_SIZE,
                                           join_by="\n..\n",
                                           left_larger=True,
                                           beginning_on_size_error=True)
            stdout_file.close()
            stderr_file.close()
            log.debug('execution finished: %s' % command_line)
        except Exception:
            job_wrapper.fail("failure running job", exception=True)
            log.exception("failure running job %d" % job_wrapper.job_id)
            return
        self._handle_metadata_externally(job_wrapper)
        # Finish the job!
        try:
            job_wrapper.finish(stdout, stderr, exit_code)
        except:
            log.exception("Job wrapper finish method failed")
            job_wrapper.fail("Unable to finish job", exception=True)