Exemplo n.º 1
0

print "Submitting jobs..."
multiPartJob.submit()

restarted = False

# now we wait for all jobs to finish. Actually, we probably should test whether the job was successful as well...
while not multiPartJob.isFinished(True):
    # printing some stats
    print multiPartJob.getProgress()
    
    # restart failed jobs everytime
    failedpolicy = DefaultResubmitPolicy()
    # to only resubmit failed jobs, we have to remove the waiting jobs resubmission that is set by default
    multiPartJob.restart(failedpolicy, True)

    # restart once after the jobsubmission is finished to optimize job distributions to queues where the job actually runs
    if not restarted:
        
        # actually, it probably would be a good idea to refresh the job status here because otherwise the restart will just 
        # restart failed jobs that were already submitted with the restart above...  not really sure...
        #multiPartJob.refresh()
        
        # this might not work the first few times because in the background the batchjob is still submitting...
        print "trying to restarting job..."
        
        policy = DefaultResubmitPolicy()
        # the next line doesn't make sense since it's the default anyway. Just to demonstrate.
        policy.setProperty(DefaultResubmitPolicy.RESTART_WAITING_JOBS, True)
        restarted = multiPartJob.restart(policy, True)
Exemplo n.º 2
0
start = 30
end = 40

pathToInputFiles = batchJob.pathToInputFiles()

inputFile1relPath = pathToInputFiles+'inputFile1.txt ' 
inputFile2relPath = pathToInputFiles+'inputFile2.txt' 

for i in range(start, end):
    # create a unique jobname for every job
    jobname = batchJobName+"_"+ str(i)
    
    print 'Creating job: '+jobname
    
    # create the single job
    job = JobObject(si)
    job.setJobname(jobname)
    # better to set the application to use explicitely because in that case we don't need to use mds (faster)
    job.setApplication('UnixCommands')
    job.setCommandline('cat '+ inputFile1relPath + ' ' + inputFile2relPath)

    job.setWalltimeInSeconds(60)
    # adding the job to the multijob
    batchJob.addJob(job)

# only start the newly added jobs and wait for the restart to finish
batchJob.restart(False, False, True, True)


# don't forget to exit properly. this cleans up possible existing threads/executors
sys.exit()
Exemplo n.º 3
0
si = LoginManager.loginCommandline()

# load (but not refresh yet) batchjob, this might take a while
batchJob = BatchJobObject(si, batchJobname, False)

while not batchJob.isFinished(True) and False:
    
    print batchJob.getProgress()
    
    print str(batchJob.getNumberOfFailedJobs())
    
    if batchJob.getNumberOfFailedJobs() > 0:
        
        print str(batchJob.getNumberOfFailedJobs()) + ' failed jobs found. restarting...'
        failedpolicy = DefaultResubmitPolicy()
        batchJob.restart(failedpolicy, True)
        print 'Restart finished.'
        
    time.sleep(5)

jobsToRestart = []

for job in batchJob.getJobs():
    print "Job: "+job.getJobname()+", Status: "+job.getStatusString(False)

    try:
        output = job.getStdOutContent()
        index = output.find('error')
        if index != -1:
            # it doesn't actually make any sense to restart this job, since it would
            # obviously have the same result again. This is just to demonstrate how to parse