Exemplo n.º 1
0
def lsf_run_until_done(to_run_dict, logfile, queue, bsub_flags, jobname_base,
                       num_batches, MAX_RETRY):
    from run_safe import unfinished_cmds
    cmds = unfinished_cmds(to_run_dict)

    retries = 0
    last_cmds = []
    while len(cmds) > 0:
        print >> sys.stderr, '%s: %s cmds to run in %s batches on queue %s, logs in %s' % (
            jobname_base, len(cmds), num_batches, queue, logfile)
        #code to halt execution on recurrent errors
        if set(last_cmds) == set(cmds):
            if retries > MAX_RETRY:
                errstr = 'maximum number of retry attempts (%s) exceeded with identical jobs lists.  Check logs (%s) for recurrent errors' % (
                    MAX_RETRY, logfile)
                raise IOError, errstr
            else:
                retries += 1
        last_cmds = cmds

        jobids, namedict = lsf_jobs_submit(cmds,
                                           logfile,
                                           queue,
                                           bsub_flags,
                                           jobname_base=jobname_base,
                                           num_batches=num_batches)
        time.sleep(20)
        lsf_wait_for_jobs(jobids, logfile, namedict=namedict)

        cmds = unfinished_cmds(to_run_dict)
    print >> sys.stderr, 'DONE\n'
Exemplo n.º 2
0
def run_until_done(to_run_dict,jobname_base,scriptdir, runtime,mem, num_batches, partition='general' ,force_source=False,MAX_RETRY=MAX_RETRY,**kwargs):
    '''given to-run dictionary as populated by run_safe.add_cmd (see run_safe.py in py_util) and scheduling parameters
    submits jobs that have not yet completed per run_safe .done files until all jobs finish or until identical job lists are submitted MAX_RETRY times
    see jobs_submit and wait_for_jobs in this module for more details
    kwargs go to jobs_submit; see jobs_submit and slurm_script for handling of additional arguments
    '''
    from run_safe import unfinished_cmds
    cmds = unfinished_cmds(to_run_dict)
    
    retries = 0
    last_cmds = []
    while len(cmds) > 0:
        print >> sys.stderr, '%s: %s cmds to run in %s batches on queue %s, logs in %s' % (jobname_base,len(cmds),num_batches,partition,scriptdir)
        #code to halt execution on recurrent errors
        if set(last_cmds) == set(cmds):
            if retries > MAX_RETRY:
                errstr = 'maximum number of retry attempts (%s) exceeded with identical jobs lists.  Check logs (%s) for recurrent errors' % (MAX_RETRY,scriptdir)
                raise IOError, errstr
            else:
                retries += 1
        last_cmds = cmds
        
        jobsdict = jobs_submit(cmds,jobname_base,scriptdir, runtime,mem, num_batches,partition=partition ,force_source=force_source, **kwargs)
        time.sleep(20)
        wait_for_jobs(jobsdict,restart_partition=partition,sleeptime = 20)
        time.sleep(20)
        cmds = unfinished_cmds(to_run_dict)
    print >> sys.stderr, 'DONE\n'
Exemplo n.º 3
0
def run_until_done(to_run_dict,jobname_base,scriptdir, runtime,mem, num_batches, partition='general' ,force_source=False,MAX_RETRY=MAX_RETRY,**kwargs):
    '''given to-run dictionary as populated by run_safe.add_cmd (see run_safe.py in py_util) and scheduling parameters
    submits jobs that have not yet completed per run_safe .done files until all jobs finish or until identical job lists are submitted MAX_RETRY times
    see jobs_submit and wait_for_jobs in this module for more details
    kwargs go to jobs_submit; see jobs_submit and slurm_script for handling of additional arguments
    '''
    from run_safe import unfinished_cmds
    cmds = unfinished_cmds(to_run_dict)
    
    retries = 0
    last_cmds = []
    while len(cmds) > 0:
        print >> sys.stderr, '%s: %s cmds to run in %s batches on queue %s, logs in %s' % (jobname_base,len(cmds),num_batches,partition,scriptdir)
        #code to halt execution on recurrent errors
        if set(last_cmds) == set(cmds):
            if retries > MAX_RETRY:
                errstr = 'maximum number of retry attempts (%s) exceeded with identical jobs lists.  Check logs (%s) for recurrent errors' % (MAX_RETRY,scriptdir)
                raise IOError, errstr
            else:
                retries += 1
        last_cmds = cmds
        
        jobsdict = jobs_submit(cmds,jobname_base,scriptdir, runtime,mem, num_batches,partition=partition ,force_source=force_source, **kwargs)
        time.sleep(20)
        wait_for_jobs(jobsdict,restart_partition=partition,sleeptime = 20)
        time.sleep(20)
        cmds = unfinished_cmds(to_run_dict)
    print >> sys.stderr, 'DONE\n'
                                                                                                                                            
Exemplo n.º 4
0
Arquivo: LSF.py Projeto: alexagrf/rtd
def lsf_run_until_done(to_run_dict,logfile,queue,bsub_flags,jobname_base,num_batches,MAX_RETRY):
    from run_safe import unfinished_cmds
    cmds = unfinished_cmds(to_run_dict)

    retries = 0
    last_cmds = []
    while len(cmds) > 0:
        print >> sys.stderr, '%s: %s cmds to run in %s batches on queue %s, logs in %s' % (jobname_base,len(cmds),num_batches,queue,logfile)
        #code to halt execution on recurrent errors
        if set(last_cmds) == set(cmds):
            if retries > MAX_RETRY:
                errstr = 'maximum number of retry attempts (%s) exceeded with identical jobs lists.  Check logs (%s) for recurrent errors' % (MAX_RETRY,logfile)
                raise IOError, errstr
            else:
                retries += 1
        last_cmds = cmds

        jobids,namedict = lsf_jobs_submit(cmds,logfile,queue,bsub_flags,jobname_base=jobname_base,num_batches=num_batches)
        time.sleep(20)
        lsf_wait_for_jobs(jobids,logfile,namedict=namedict)

        cmds = unfinished_cmds(to_run_dict)
    print >> sys.stderr, 'DONE\n'