示例#1
0
 def test_wait(self):
     """pbs.qwait should wait for a pbs job to finish running."""
     if os.path.exists(self.temp_output_filename):
         os.remove(self.temp_output_filename)
     pbs_id = pbs.qsub(self.pbs_script_filename)
     pbs.qwait(pbs_id)
     os.system('ls > /dev/null') # This triggers the panfs file system to make the file appear.
     assert os.path.exists(self.temp_output_filename), "pbs.qwait returned, but the expected output does not yet exist."
示例#2
0
def submit_files_until_done(filenames, wait_for_all=False, delay_check=0.5, sleep_seconds=60 * 5, 
                            quiet=False, 
                            fail_when_max=False, 
                            retry_on_failure=True):
    global max_submissions
    submitted_ids = []
    num_to_submit = len(filenames)
    while filenames:
        num_submitted = len(pbs.qstat(user=os.environ['USER']))
        if (num_submitted < max_submissions):
            if os.path.exists(filenames[0]):
                try:
                    job_id = pbs.qsub(filenames[0], verbose=not quiet) 
                    if delay_check:
                        time.sleep(delay_check)
                    pbs.qstat(job_id=job_id) # If this doesn't throw, then it was submitted successfully
                    if not quiet:
                        print 'Submitted %s as "%s" at %s  (%s/%s left to submit)' % (filenames[0], job_id, time.asctime(), len(filenames[1:]), num_to_submit)
                    filenames = filenames[1:]
                    submitted_ids.append(job_id)
                    num_submitted = num_submitted + 1
                    if not quiet:
                        print 'I think submitted %d/%d' % (num_submitted,max_submissions)
                    sys.stderr.flush()
                    sys.stdout.flush()
                except pbs.PBSUtilError:
                    traceback.print_exc()
                    if not quiet:
                        print 'Failed to submit %s at %s  (%s left to submit)' % (filenames[0], time.asctime(), len(filenames[1:]))
                    sys.stderr.flush()
                    sys.stdout.flush()

                    if not retry_on_failure:
                        raise QSubFailure()

                    time.sleep(max(int(round(sleep_seconds/2)), 1))
                    # Maybe we saturated the queue.
            else:
                if not quiet:
                    print 'ERROR: Cannot submit %s because it does not exist.' % filenames[0]
                sys.stderr.flush()
                sys.stdout.flush()

                filenames = filenames[1:]
        else:
            if fail_when_max:
                raise ReachedMax()
            sys.stdout.write('Queue is currently full.')
            sys.stdout.flush()
            time.sleep(sleep_seconds)
    if wait_for_all:
        for job_id in submitted_ids:
            pbs.qwait(job_id)
    return submitted_ids
示例#3
0
def wait_for_jobs(job_ids):
    for job_id in job_ids:
        pbs.qwait(job_id=job_id)
示例#4
0
def main(argv):
    parser = optparse.OptionParser()
    parser.add_option('-o', dest='output_file_name', default=None,
                      help="Destination file for the output. ")
    parser.add_option('-i', action='store', dest='input_file_name',
                      help="Write input file contents to command's stdin.")
    parser.add_option('-v', action='store_true', dest='verbose', default=False,
                      help="Display the script to be submitted.")
    parser.add_option('-q', action='store_true', dest='quiet', default=False,
                      help="Do not include information about processor nodes in the output, or the state of the job.")
    parser.add_option('-w', action='store', dest='script_name', default=None,
                      help="Save the script as this file name.")
    parser.add_option('-x', action='store_true', dest='dont_submit', default=False,
                      help="Don't submit the job.  This is really only useful when combined with '-w'.")

    parser.add_option('-W', action='store_true', dest='wait', default=False,
                      help="Wait for the job to finish, and display its output.")

    parser.add_option('-n', action='store', dest='job_name',  default=None,
                      help="PBS jobname")
    parser.add_option('-p', action='store', dest='numcpu', 
                      default=None,
                      help="Number of cpus to use.")
    parser.add_option('--num-nodes', action='store', dest='numnodes', 
                      default=None,
                      help="Number of nodes to use.")



    expect_another = False
    for idx, arg in enumerate(argv):
        if expect_another:
            expect_another = False
            continue
        if arg[0] == '-':
            if arg[-1] in ['o', 'i', 'w', 'n', 'p']:
                expect_another = True
            continue
        break

    if arg[0] == '-':
        # no command was given
        idx += 1

    (options, args) = parser.parse_args(argv[:idx])
    command_parts = argv[idx:]
    command = ' '.join(command_parts)


    with tempfile_util.Session(local=True) as session:
        # Pick a file to output to
        if options.output_file_name is not None:
            output_file_name = options.output_file_name
            wait = False
        elif options.script_name:
            output_file_name = options.script_name + '.out'
            session.add_name(output_file_name + '.err')
        else:
            output_file_name = session.temp_file_name('.submit_command.out')
            session.add_name(output_file_name + '.err')
            if options.script_name:
                print 'Outputting to ', output_file_name
            wait = True

        # Pick a script name
        if options.script_name is not None:
            script_file_name = options.script_name
        else:
            script_file_name = session.temp_file_name('.submit_command.pbs')

        job_id = None
        if options.job_name is not None:
            job_name = options.job_name
        elif output_file_name is not None:
            job_name = output_file_name
        else:
            job_name = filter(lambda x: x.isalpha(), command_parts[0])

        # Create the script
        run_command_here(script_file_name, command, input_file_name=options.input_file_name, output_file_name=output_file_name, job_name=job_name, verbose=not options.quiet, numcpu=options.numcpu, numnodes=options.numnodes)

        if options.verbose:
            with open(script_file_name) as script_file:
                print script_file.read()

        # Maybe submit it
        if options.script_name is not None:
            print 'Wrote %s.' % script_file_name


        if options.dont_submit:
            return
        else:
            try:
                if not options.quiet:
                    print 'Submitting...'
                job_id, = nice_submit.submit_files_until_done([script_file_name], wait_for_all=False,
                                                              quiet=options.quiet)

                # If they directed the output, just exit
                if not options.wait:
                    return

                if not options.quiet:
                    print 'Waiting on job ', job_id, ' to finish running.'
                pbs.qwait(job_id)

            except KeyboardInterrupt: 
                print '^C'
                if job_id: 
                    print 'Killing ', job_id
                    pbs.qdel(job_id)

            if os.path.exists(output_file_name):
                with open(output_file_name) as output_file:
                    print output_file.read()
            else:
                print "Output file doesn't seem to exist! Try:\ncat %s\n" % output_file_name
            if os.path.exists(output_file_name + '.err'):
                with open(output_file_name + '.err') as err_file:
                    print err_file.read()