Exemplo n.º 1
0
    sys.exit()

print "Job distribution:"
for subLoc in multiPartJob.getOptimizationResult().keySet():
    print subLoc + " : " +multiPartJob.getOptimizationResult().get(subLoc)


print "Submitting jobs..."
multiPartJob.submit()

restarted = False

# now we wait for all jobs to finish. Actually, we probably should test whether the job was successful as well...
while not multiPartJob.isFinished(True):
    # printing some stats
    print multiPartJob.getProgress()
    
    # restart failed jobs everytime
    failedpolicy = DefaultResubmitPolicy()
    # to only resubmit failed jobs, we have to remove the waiting jobs resubmission that is set by default
    multiPartJob.restart(failedpolicy, True)

    # restart once after the jobsubmission is finished to optimize job distributions to queues where the job actually runs
    if not restarted:
        
        # actually, it probably would be a good idea to refresh the job status here because otherwise the restart will just 
        # restart failed jobs that were already submitted with the restart above...  not really sure...
        #multiPartJob.refresh()
        
        # this might not work the first few times because in the background the batchjob is still submitting...
        print "trying to restarting job..."
Exemplo n.º 2
0
from org.vpac.grisu.frontend.control.login import LoginManager
from org.vpac.grisu.frontend.model.job import BatchJobObject
from org.vpac.grisu.frontend.model.job import JobException
import sys
import time

batchJobname  =  sys.argv[1]

si = LoginManager.loginCommandline()

# load (but not refresh yet) batchjob, this might take a while
batchJob = BatchJobObject(si, batchJobname, False)

while not batchJob.isFinished(True) and False:
    
    print batchJob.getProgress()
    
    print str(batchJob.getNumberOfFailedJobs())
    
    if batchJob.getNumberOfFailedJobs() > 0:
        
        print str(batchJob.getNumberOfFailedJobs()) + ' failed jobs found. restarting...'
        failedpolicy = DefaultResubmitPolicy()
        batchJob.restart(failedpolicy, True)
        print 'Restart finished.'
        
    time.sleep(5)

jobsToRestart = []

for job in batchJob.getJobs():