def master_resubmit(self,jobs): '''Resubmit failed Jedi job''' from pandatools import Client jobIDs = {} for job in jobs: jobIDs[job.backend.id] = job allJobIDs = jobIDs.keys() pandaJobIDs = {} for jID in allJobIDs: with inject_proxy(self.credential_requirements): status, jediTaskDict = Client.getJediTaskDetails({'jediTaskID': jID},False,True,verbose=False) if status != 0: logger.error("Failed to get task details for %s" % jID) raise BackendError('Jedi','Return code %d retrieving job status information.' % status) # Retrieve job job = jobIDs[jediTaskDict['jediTaskID']] newJobsetID = -1 # get jobset retryJobs = [] # jspecs resubmittedJobs = [] # ganga jobs if jediTaskDict['status'] in ['failed', 'killed', 'cancelled', 'aborted', 'broken', 'finished' ]: retryJobs.append(job) resubmittedJobs.append(jID) #elif jediTaskDict['status'] == 'finished': # pass else: logger.warning("Cannot resubmit. Jedi task %s is status %s." %(jID, jediTaskDict['status'] )) return False # submit if len(retryJobs)==0: logger.warning("No failed jobs to resubmit") return False with inject_proxy(self.credential_requirements): status,out = Client.retryTask(jID, verbose=False) if status != 0: logger.error(status) logger.error(out) logger.error("Failed to retry JobID=%s" % jID) return False tmpStat,tmpDiag = out if not tmpStat: logger.error(tmpDiag) logger.error("Failed to retry JobID=%s" % jID) return False logger.info(tmpDiag) job.backend.status = None job.backend.jobSpec = {} job.updateStatus('submitted') logger.info('Resubmission successful') return True
def master_submit(self,rjobs,subjobspecs,buildjobspec): '''Submit jobs''' from pandatools import Client from pandatools import MiscUtils from Ganga.Core import IncompleteJobSubmissionError from Ganga.Utility.logging import log_user_exception job = self.getJobObject() # job name jobName = 'ganga.%s' % MiscUtils.wrappedUuidGen() jobspecs = {} if buildjobspec: jobspecs = buildjobspec else: jobspecs = subjobspecs logger.debug(jobspecs) # submit task for subjob in rjobs: subjob.updateStatus('submitting') logger.info("Submitting to Jedi ...") verbose = logger.isEnabledFor(10) with inject_proxy(self.credential_requirements): status, tmpOut = Client.insertTaskParams(jobspecs, verbose) logger.debug(tmpOut) if status != 0: logger.error("Task submission to Jedi failed with %s " %status) return False if tmpOut[0] == False: logger.error("Task submission to Jedi failed %s" %tmpOut[1]) return False logger.info("Task submission to Jedi suceeded with new jediTaskID=%s" %tmpOut[1]) #if buildjobspec: # job.backend.buildjob = PandaBuildJob() # job.backend.buildjob.id = jobids[0][0] # job.backend.buildjob.url = 'http://panda.cern.ch/?job=%d'%jobids[0][0] # del jobids[0] for subjob in rjobs: subjob.backend.id = tmpOut[1] subjob.backend.url = 'http://pandamon.cern.ch/jedi/taskinfo?days=20&task=%d'%tmpOut[1] subjob.updateStatus('submitted') logger.info("Panda monitor url: %s" %subjob.backend.url) return True
def master_kill(self): '''Kill jobs''' from pandatools import Client job = self.getJobObject() logger.debug('Killing job %s' % job.getFQID('.')) active_status = [ None, 'registered', 'waiting', 'defined', 'pending', 'assigning', 'ready', 'scouting', 'running', 'holding', 'merging', 'prepared', 'aborting', 'finishing' ] #active_status = [ None, 'defined', 'unknown', 'assigned', 'waiting', 'activated', 'sent', 'starting', 'running', 'holding', 'transferring' ] if self.id and self.status in active_status: with inject_proxy(self.credential_requirements): status, output = Client.killTask(self.id) if status: logger.error('Failed killing job (status = %d)',status) return False else: logger.info('Killing Jedi task %s, Server returned: %s' %(self.id, output)) else: logger.error('Cannot kill Jedi job %s since it is not in active status', self.id) return True