print "Job: "+job.getJobname()+", Error: "+error.getFailures().get(job).getLocalizedMessage() sys.exit() print "Job distribution:" for subLoc in multiPartJob.getOptimizationResult().keySet(): print subLoc + " : " +multiPartJob.getOptimizationResult().get(subLoc) print "Submitting jobs..." multiPartJob.submit() restarted = False # now we wait for all jobs to finish. Actually, we probably should test whether the job was successful as well... while not multiPartJob.isFinished(True): # printing some stats print multiPartJob.getProgress() # restart failed jobs everytime failedpolicy = DefaultResubmitPolicy() # to only resubmit failed jobs, we have to remove the waiting jobs resubmission that is set by default multiPartJob.restart(failedpolicy, True) # restart once after the jobsubmission is finished to optimize job distributions to queues where the job actually runs if not restarted: # actually, it probably would be a good idea to refresh the job status here because otherwise the restart will just # restart failed jobs that were already submitted with the restart above... not really sure... #multiPartJob.refresh()
from org.vpac.grisu.control import DefaultResubmitPolicy from org.vpac.grisu.frontend.control.login import LoginManager from org.vpac.grisu.frontend.model.job import BatchJobObject from org.vpac.grisu.frontend.model.job import JobException import sys import time batchJobname = sys.argv[1] si = LoginManager.loginCommandline() # load (but not refresh yet) batchjob, this might take a while batchJob = BatchJobObject(si, batchJobname, False) while not batchJob.isFinished(True) and False: print batchJob.getProgress() print str(batchJob.getNumberOfFailedJobs()) if batchJob.getNumberOfFailedJobs() > 0: print str(batchJob.getNumberOfFailedJobs()) + ' failed jobs found. restarting...' failedpolicy = DefaultResubmitPolicy() batchJob.restart(failedpolicy, True) print 'Restart finished.' time.sleep(5) jobsToRestart = []