def _prepareSubmit(self, task, jobNumList, queryArguments): localJdlFilePath = os.path.join(self.parentPool.getSandboxPath(), 'htc-%s.schedd-%s.jdl' % (self.parentPool.wmsName,md5(self.getURI()).hexdigest())) readyJobNumList = self._stageSubmitFiles(task, jobNumList) utils.safeWrite(open(localJdlFilePath, 'w'), lmap(lambda line: line + '\n', self._getJDLData(task, readyJobNumList, queryArguments))) raise NotImplementedError('JDL must get moved to remote') return jdlFilePath
def _prepareSubmit(self, task, jobNumList, queryArguments): jdlFilePath = os.path.join(self.parentPool.getSandboxPath(), 'htc-%s.schedd-%s.jdl' % (self.parentPool.wmsName,md5(self.getURI()).hexdigest())) utils.safeWrite( open(jdlFilePath, 'w'), ( line + '\n' for line in self._getJDLData(task, jobNumList, queryArguments)) ) return jdlFilePath
def _submitJob(self, jobNum, module): fd, jdl = tempfile.mkstemp('.jdl') try: data = self.makeJDL(jobNum, module) utils.safeWrite(os.fdopen(fd, 'w'), data) except Exception: utils.removeFiles([jdl]) raise BackendError('Could not write jdl data to %s.' % jdl) try: tmp = utils.filterDict(self._submitParams, vF = lambda v: v) params = str.join(' ', map(lambda (x, y): '%s %s' % (x, y), tmp.items())) log = tempfile.mktemp('.log') activity = utils.ActivityLog('submitting jobs') proc = utils.LoggedProcess(self._submitExec, '%s --nomsg --noint --logfile "%s" "%s"' % (params, log, jdl)) wmsId = None for line in filter(lambda x: x.startswith('http'), map(str.strip, proc.iter())): wmsId = line retCode = proc.wait() del activity if (retCode != 0) or (wmsId == None): if self.explainError(proc, retCode): pass else: proc.logError(self.errorLog, log = log, jdl = jdl) finally: utils.removeFiles([log, jdl]) return (jobNum, utils.QM(wmsId, self._createId(wmsId), None), {'jdl': str.join('', data)})
def _submitJob(self, jobNum, module): fd, jdl = tempfile.mkstemp('.jdl') try: jdlData = self.makeJDL(jobNum, module) utils.safeWrite(os.fdopen(fd, 'w'), jdlData) except Exception: utils.removeFiles([jdl]) raise BackendError('Could not write jdl data to %s.' % jdl) try: submitArgs = [] for key_value in utils.filterDict(self._submitParams, vF = lambda v: v).items(): submitArgs.extend(key_value) submitArgs.append(jdl) activity = Activity('submitting job %d' % jobNum) proc = LocalProcess(self._submitExec, '--nomsg', '--noint', '--logfile', '/dev/stderr', *submitArgs) gcID = None for line in ifilter(lambda x: x.startswith('http'), imap(str.strip, proc.stdout.iter(timeout = 60))): gcID = line retCode = proc.status(timeout = 0, terminate = True) activity.finish() if (retCode != 0) or (gcID is None): if self.explainError(proc, retCode): pass else: self._log.log_process(proc, files = {'jdl': SafeFile(jdl).read()}) finally: utils.removeFiles([jdl]) return (jobNum, utils.QM(gcID, self._createId(gcID), None), {'jdl': str.join('', jdlData)})
def writeWMSIds(self, ids): try: fd, jobs = tempfile.mkstemp('.jobids') utils.safeWrite(os.fdopen(fd, 'w'), str.join('\n', self._getRawIDs(ids))) except Exception: raise BackendError('Could not write wms ids to %s.' % jobs) return jobs
def _writeJobConfig(self, cfgPath, jobNum, task, extras): try: jobEnv = utils.mergeDicts([task.getJobConfig(jobNum), extras]) jobEnv['GC_ARGS'] = task.getJobArguments(jobNum).strip() content = utils.DictFormat(escapeString = True).format(jobEnv, format = 'export %s%s%s\n') utils.safeWrite(open(cfgPath, 'w'), content) except Exception: raise BackendError('Could not write job config data to %s.' % cfgPath)
def _writeJobConfig(self, cfgPath, jobNum, module, extras = {}): try: jobEnv = utils.mergeDicts([module.getJobConfig(jobNum), extras]) jobEnv['GC_ARGS'] = module.getJobArguments(jobNum).strip() content = utils.DictFormat(escapeString = True).format(jobEnv, format = 'export %s%s%s\n') utils.safeWrite(open(cfgPath, 'w'), content) except: raise RethrowError('Could not write job config data to %s.' % cfgPath)
def _prepareSubmit(self, task, jobNumList, queryArguments): jdlFilePath = os.path.join( self.parentPool.getSandboxPath(), 'htc-%s.schedd-%s.jdl' % (self.parentPool.wmsName, md5(self.getURI()).hexdigest())) utils.safeWrite( open(jdlFilePath, 'w'), lmap(lambda line: line + '\n', self._getJDLData(task, jobNumList, queryArguments))) return jdlFilePath
def _writeJobConfig(self, cfgPath, jobNum, task, extras): try: jobEnv = utils.mergeDicts([task.getJobConfig(jobNum), extras]) jobEnv['GC_ARGS'] = task.getJobArguments(jobNum).strip() content = utils.DictFormat(escapeString=True).format( jobEnv, format='export %s%s%s\n') utils.safeWrite(open(cfgPath, 'w'), content) except Exception: raise BackendError('Could not write job config data to %s.' % cfgPath)
def _prepareSubmit(self, task, jobNumList, queryArguments): localJdlFilePath = os.path.join( self.parentPool.getSandboxPath(), 'htc-%s.schedd-%s.jdl' % (self.parentPool.wmsName, md5(self.getURI()).hexdigest())) readyJobNumList = self._stageSubmitFiles(task, jobNumList) utils.safeWrite( open(localJdlFilePath, 'w'), lmap(lambda line: line + '\n', self._getJDLData(task, readyJobNumList, queryArguments))) raise NotImplementedError('JDL must get moved to remote') return jdlFilePath
def _writeJobConfig(self, cfgPath, jobNum, task, extras): try: jobEnv = utils.mergeDicts([task.getJobConfig(jobNum), extras]) jobEnv['GC_ARGS'] = task.getJobArguments(jobNum).strip() # write $FILE_NAMES into file in case [wms] file names environment = False # This can help avoiding too large environments # TODO: send fileNames.job_%d.txt together with the sandbox if ("FILE_NAMES" in jobEnv) and (not self.fileNamesEnvironment): fileNamesList = os.path.join(self.config.getPath('sandbox path', self.config.getWorkPath('sandbox')), "fileNames.job_%d.txt" % jobNum) with open(fileNamesList, "w") as fileNamesListFile: fileNamesListFile.write("\n".join(shlex.split(jobEnv.pop("FILE_NAMES")))) jobEnv["FILE_NAMES"] = fileNamesList content = utils.DictFormat(escapeString = True).format(jobEnv, format = 'export %s%s%s\n') utils.safeWrite(open(cfgPath, 'w'), content) except Exception: raise BackendError('Could not write job config data to %s.' % cfgPath)
def _submitJob(self, jobNum, module): fd, jdl = tempfile.mkstemp('.jdl') try: jdlData = self.makeJDL(jobNum, module) utils.safeWrite(os.fdopen(fd, 'w'), jdlData) except Exception: utils.removeFiles([jdl]) raise BackendError('Could not write jdl data to %s.' % jdl) try: submitArgs = [] for key_value in utils.filterDict(self._submitParams, vF=lambda v: v).items(): submitArgs.extend(key_value) submitArgs.append(jdl) activity = Activity('submitting job %d' % jobNum) proc = LocalProcess(self._submitExec, '--nomsg', '--noint', '--logfile', '/dev/stderr', *submitArgs) gcID = None for line in ifilter(lambda x: x.startswith('http'), imap(str.strip, proc.stdout.iter(timeout=60))): gcID = line retCode = proc.status(timeout=0, terminate=True) activity.finish() if (retCode != 0) or (gcID is None): if self.explainError(proc, retCode): pass else: self._log.log_process(proc, files={'jdl': SafeFile(jdl).read()}) finally: utils.removeFiles([jdl]) return (jobNum, utils.QM(gcID, self._createId(gcID), None), { 'jdl': str.join('', jdlData) })
def _writeJobConfig(self, cfgPath, jobNum, task, extras): try: jobEnv = utils.mergeDicts([task.getJobConfig(jobNum), extras]) jobEnv['GC_ARGS'] = task.getJobArguments(jobNum).strip() # write $FILE_NAMES into file in case [wms] file names environment = False # This can help avoiding too large environments # TODO: send fileNames.job_%d.txt together with the sandbox if ("FILE_NAMES" in jobEnv) and (not self.fileNamesEnvironment): fileNamesList = os.path.join( self.config.getPath('sandbox path', self.config.getWorkPath('sandbox')), "fileNames.job_%d.txt" % jobNum) with open(fileNamesList, "w") as fileNamesListFile: fileNamesListFile.write("\n".join( shlex.split(jobEnv.pop("FILE_NAMES")))) jobEnv["FILE_NAMES"] = fileNamesList content = utils.DictFormat(escapeString=True).format( jobEnv, format='export %s%s%s\n') utils.safeWrite(open(cfgPath, 'w'), content) except Exception: raise BackendError('Could not write job config data to %s.' % cfgPath)
def submitJobs(self, jobNumListFull, module): submitBatch=25 for index in range(0,len(jobNumListFull),submitBatch): jobNumList=jobNumListFull[index:index+submitBatch] self.debugOut("\nStarted submitting: %s" % jobNumList) self.debugPool() # get the full job config path and basename def _getJobCFG(jobNum): return os.path.join(self.getSandboxPath(jobNum), 'job_%d.var' % jobNum), 'job_%d.var' % jobNum activity = utils.ActivityLog('preparing jobs') # construct a temporary JDL for this batch of jobs jdlDescriptor, jdlFilePath = tempfile.mkstemp(suffix='.jdl') jdlSubmitPath = jdlFilePath self.debugOut("Writing temporary jdl to: "+jdlSubmitPath) try: data = self.makeJDLdata(jobNumList, module) utils.safeWrite(os.fdopen(jdlDescriptor, 'w'), data) except Exception: utils.removeFiles([jdlFilePath]) raise BackendError('Could not write jdl data to %s.' % jdlFilePath) # create the _jobconfig.sh file containing the actual data for jobNum in jobNumList: try: self._writeJobConfig(_getJobCFG(jobNum)[0], jobNum, module) except Exception: raise BackendError('Could not write _jobconfig data for %s.' % jobNum) self.debugOut("Copying to remote") # copy infiles to ssh/gsissh remote pool if required if self.remoteType == poolType.SSH or self.remoteType == poolType.GSISSH: activity = utils.ActivityLog('preparing remote scheduler') self.debugOut("Copying to sandbox") workdirBase = self.getWorkdirPath() # TODO: check whether shared remote files already exist and copy otherwise for fileDescr, fileSource, fileTarget in self._getSandboxFilesIn(module): copyProcess = self.Pool.LoggedCopyToRemote(fileSource, os.path.join(workdirBase, fileTarget)) if copyProcess.wait() != 0: if self.explainError(copyProcess, copyProcess.wait()): pass else: copyProcess.logError(self.errorLog, brief=True) self.debugFlush() # copy job config files self.debugOut("Copying job configs") for jobNum in jobNumList: fileSource, fileTarget = _getJobCFG(jobNum) copyProcess = self.Pool.LoggedCopyToRemote(fileSource, os.path.join(self.getWorkdirPath(jobNum), fileTarget)) if copyProcess.wait() != 0: if self.explainError(copyProcess, copyProcess.wait()): pass else: copyProcess.logError(self.errorLog, brief=True) self.debugFlush() # copy jdl self.debugOut("Copying jdl") jdlSubmitPath = os.path.join(workdirBase, os.path.basename(jdlFilePath)) copyProcess = self.Pool.LoggedCopyToRemote(jdlFilePath, jdlSubmitPath ) if copyProcess.wait() != 0: if self.explainError(copyProcess, copyProcess.wait()): pass else: copyProcess.logError(self.errorLog, brief=True) self.debugFlush() # copy proxy for authFile in self.proxy.getAuthFiles(): self.debugOut("Copying proxy") copyProcess = self.Pool.LoggedCopyToRemote(authFile, os.path.join(self.getWorkdirPath(), os.path.basename(authFile))) if copyProcess.wait() != 0: if self.explainError(copyProcess, copyProcess.wait()): pass else: copyProcess.logError(self.errorLog, brief=True) self.debugFlush() self.debugOut("Starting jobs") try: # submit all jobs simultaneously and temporarily store verbose (ClassAdd) output activity = utils.ActivityLog('queuing jobs at scheduler') proc = self.Pool.LoggedProcess(self.submitExec, ' -verbose %(JDL)s' % { "JDL": jdlSubmitPath }) self.debugOut("AAAAA") # extract the Condor ID (WMS ID) of the jobs from output ClassAds wmsJobIdList = [] for line in proc.iter(): if "GridControl_GCIDtoWMSID" in line: GCWMSID=line.split('=')[1].strip(' "\n').split('@') GCID,WMSID=int(GCWMSID[0]),GCWMSID[1].strip() # Condor creates a default job then overwrites settings on any subsequent job - i.e. skip every second, but better be sure if ( not wmsJobIdList ) or ( GCID not in zip(*wmsJobIdList)[0] ): wmsJobIdList.append((self._createId(WMSID),GCID)) if "GridControl_GCtoWMSID" in line: self.debugOut("o : %s" % line) self.debugOut("o : %s" % wmsJobIdList) retCode = proc.wait() if (retCode != 0) or ( len(wmsJobIdList) < len(jobNumList) ): if self.explainError(proc, retCode): pass else: utils.eprint("Submitted %4d jobs of %4d expected" % (len(wmsJobIdList),len(jobNumList))) proc.logError(self.errorLog, jdl = jdlFilePath) finally: utils.removeFiles([jdlFilePath]) self.debugOut("Done Submitting") # yield the (jobNum, WMS ID, other data) of each job successively for index in range(len(wmsJobIdList)): yield (wmsJobIdList[index][1], wmsJobIdList[index][0], {} ) self.debugOut("Yielded submitted job") self.debugFlush()
def commit(self, jobNum, jobObj): fp = open(os.path.join(self._dbPath, 'job_%d.txt' % jobNum), 'w') utils.safeWrite(fp, utils.DictFormat(escapeString = True).format(jobObj.getAll()))
def submitJobs(self, jobNumListFull, module): submitBatch = 25 for index in irange(0, len(jobNumListFull), submitBatch): jobNumList = jobNumListFull[index:index + submitBatch] self.debugOut("\nStarted submitting: %s" % jobNumList) self.debugPool() # get the full job config path and basename def _getJobCFG(jobNum): return os.path.join(self.getSandboxPath(jobNum), 'job_%d.var' % jobNum), 'job_%d.var' % jobNum activity = utils.ActivityLog('preparing jobs') # construct a temporary JDL for this batch of jobs jdlDescriptor, jdlFilePath = tempfile.mkstemp(suffix='.jdl') jdlSubmitPath = jdlFilePath self.debugOut("Writing temporary jdl to: " + jdlSubmitPath) try: data = self.makeJDLdata(jobNumList, module) utils.safeWrite(os.fdopen(jdlDescriptor, 'w'), data) except Exception: utils.removeFiles([jdlFilePath]) raise BackendError('Could not write jdl data to %s.' % jdlFilePath) # create the _jobconfig.sh file containing the actual data for jobNum in jobNumList: try: self._writeJobConfig( _getJobCFG(jobNum)[0], jobNum, module, {}) except Exception: raise BackendError( 'Could not write _jobconfig data for %s.' % jobNum) self.debugOut("Copying to remote") # copy infiles to ssh/gsissh remote pool if required if self.remoteType == PoolType.SSH or self.remoteType == PoolType.GSISSH: activity = utils.ActivityLog('preparing remote scheduler') self.debugOut("Copying to sandbox") workdirBase = self.getWorkdirPath() # TODO: check whether shared remote files already exist and copy otherwise for _, fileSource, fileTarget in self._getSandboxFilesIn( module): copyProcess = self.Pool.LoggedCopyToRemote( fileSource, os.path.join(workdirBase, fileTarget)) if copyProcess.wait() != 0: if self.explainError(copyProcess, copyProcess.wait()): pass else: copyProcess.logError(self.errorLog, brief=True) self.debugFlush() # copy job config files self.debugOut("Copying job configs") for jobNum in jobNumList: fileSource, fileTarget = _getJobCFG(jobNum) copyProcess = self.Pool.LoggedCopyToRemote( fileSource, os.path.join(self.getWorkdirPath(jobNum), fileTarget)) if copyProcess.wait() != 0: if self.explainError(copyProcess, copyProcess.wait()): pass else: copyProcess.logError(self.errorLog, brief=True) self.debugFlush() # copy jdl self.debugOut("Copying jdl") jdlSubmitPath = os.path.join(workdirBase, os.path.basename(jdlFilePath)) copyProcess = self.Pool.LoggedCopyToRemote( jdlFilePath, jdlSubmitPath) if copyProcess.wait() != 0: if self.explainError(copyProcess, copyProcess.wait()): pass else: copyProcess.logError(self.errorLog, brief=True) self.debugFlush() # copy proxy for authFile in self._token.getAuthFiles(): self.debugOut("Copying proxy") copyProcess = self.Pool.LoggedCopyToRemote( authFile, os.path.join(self.getWorkdirPath(), os.path.basename(authFile))) if copyProcess.wait() != 0: if self.explainError(copyProcess, copyProcess.wait()): pass else: copyProcess.logError(self.errorLog, brief=True) self.debugFlush() self.debugOut("Starting jobs") try: # submit all jobs simultaneously and temporarily store verbose (ClassAdd) output activity = utils.ActivityLog('queuing jobs at scheduler') proc = self.Pool.LoggedExecute( self.submitExec, ' -verbose %(JDL)s' % {"JDL": jdlSubmitPath}) self.debugOut("AAAAA") # extract the Condor ID (WMS ID) of the jobs from output ClassAds wmsJobIdList = [] for line in proc.iter(): if "GridControl_GCIDtoWMSID" in line: GCWMSID = line.split('=')[1].strip(' "\n').split('@') GCID, WMSID = int(GCWMSID[0]), GCWMSID[1].strip() # Condor creates a default job then overwrites settings on any subsequent job - i.e. skip every second, but better be sure if (not wmsJobIdList) or (GCID not in lzip( *wmsJobIdList)[0]): wmsJobIdList.append((self._createId(WMSID), GCID)) if "GridControl_GCtoWMSID" in line: self.debugOut("o : %s" % line) self.debugOut("o : %s" % wmsJobIdList) retCode = proc.wait() activity.finish() if (retCode != 0) or (len(wmsJobIdList) < len(jobNumList)): if self.explainError(proc, retCode): pass else: utils.eprint("Submitted %4d jobs of %4d expected" % (len(wmsJobIdList), len(jobNumList))) proc.logError(self.errorLog, jdl=jdlFilePath) finally: utils.removeFiles([jdlFilePath]) self.debugOut("Done Submitting") # yield the (jobNum, WMS ID, other data) of each job successively for index in irange(len(wmsJobIdList)): yield (wmsJobIdList[index][1], wmsJobIdList[index][0], {}) self.debugOut("Yielded submitted job") self.debugFlush()