sys.exit() print "Job distribution:" for subLoc in multiPartJob.getOptimizationResult().keySet(): print subLoc + " : " +multiPartJob.getOptimizationResult().get(subLoc) print "Submitting jobs..." multiPartJob.submit() restarted = False # now we wait for all jobs to finish. Actually, we probably should test whether the job was successful as well... while not multiPartJob.isFinished(True): # printing some stats print multiPartJob.getProgress() # restart failed jobs everytime failedpolicy = DefaultResubmitPolicy() # to only resubmit failed jobs, we have to remove the waiting jobs resubmission that is set by default multiPartJob.restart(failedpolicy, True) # restart once after the jobsubmission is finished to optimize job distributions to queues where the job actually runs if not restarted: # actually, it probably would be a good idea to refresh the job status here because otherwise the restart will just # restart failed jobs that were already submitted with the restart above... not really sure... #multiPartJob.refresh() # this might not work the first few times because in the background the batchjob is still submitting... print "trying to restarting job..."
from org.vpac.grisu.frontend.control.login import LoginManager from org.vpac.grisu.frontend.model.job import BatchJobObject from org.vpac.grisu.frontend.model.job import JobException import sys import time batchJobname = sys.argv[1] si = LoginManager.loginCommandline() # load (but not refresh yet) batchjob, this might take a while batchJob = BatchJobObject(si, batchJobname, False) while not batchJob.isFinished(True) and False: print batchJob.getProgress() print str(batchJob.getNumberOfFailedJobs()) if batchJob.getNumberOfFailedJobs() > 0: print str(batchJob.getNumberOfFailedJobs()) + ' failed jobs found. restarting...' failedpolicy = DefaultResubmitPolicy() batchJob.restart(failedpolicy, True) print 'Restart finished.' time.sleep(5) jobsToRestart = [] for job in batchJob.getJobs():