from grisu.frontend.control.login import LoginManager from grisu.frontend.model.job import BatchJobObject from grisu.frontend.model.job import JobException import sys import time batchJobname = sys.argv[1] si = LoginManager.loginCommandline() # load (but not refresh yet) batchjob, this might take a while batchJob = BatchJobObject(si, batchJobname, False) while not batchJob.isFinished(True) and False: print batchJob.getProgress() print str(batchJob.getNumberOfFailedJobs()) if batchJob.getNumberOfFailedJobs() > 0: print str(batchJob.getNumberOfFailedJobs()) + ' failed jobs found. restarting...' failedpolicy = DefaultResubmitPolicy() batchJob.restart(failedpolicy, True) print 'Restart finished.' time.sleep(5) jobsToRestart = [] for job in batchJob.getJobs():
sys.exit() print "Job distribution:" for subLoc in batch_job.getOptimizationResult().keySet(): print subLoc + " : " + batch_job.getOptimizationResult().get(subLoc) print "Submitting jobs..." batch_job.submit() restarted = False # now we wait for all jobs to finish. Actually, we probably should test whether the job was successful as well... while not batch_job.isFinished(True): # printing some stats print batch_job.getProgress() # restart failed jobs everytime failedpolicy = DefaultResubmitPolicy() # to only resubmit failed jobs, we have to remove the waiting jobs resubmission that is set by default batch_job.restart(failedpolicy, True) # restart once after the jobsubmission is finished to optimize job distributions to queues where the job actually runs if not restarted: # actually, it probably would be a good idea to refresh the job status here because otherwise the restart will just # restart failed jobs that were already submitted with the restart above... not really sure... #multiPartJob.refresh() # this might not work the first few times because in the background the batchjob is still submitting... print "trying to restarting job..."
sys.exit() print "Job distribution:" for subLoc in batch_job.getOptimizationResult().keySet(): print subLoc + " : " +batch_job.getOptimizationResult().get(subLoc) print "Submitting jobs..." batch_job.submit() restarted = False # now we wait for all jobs to finish. Actually, we probably should test whether the job was successful as well... while not batch_job.isFinished(True): # printing some stats print batch_job.getProgress() # restart failed jobs everytime failedpolicy = DefaultResubmitPolicy() # to only resubmit failed jobs, we have to remove the waiting jobs resubmission that is set by default batch_job.restart(failedpolicy, True) # restart once after the jobsubmission is finished to optimize job distributions to queues where the job actually runs if not restarted: # actually, it probably would be a good idea to refresh the job status here because otherwise the restart will just # restart failed jobs that were already submitted with the restart above... not really sure... #multiPartJob.refresh() # this might not work the first few times because in the background the batchjob is still submitting... print "trying to restarting job..."