def __init__(self, remoteType="", **kwargs): self._log = logging.getLogger('backend.condor') self.cmd=False # pick requested remote connection try: self.remoteType = self.RPHType.str2enum(remoteType) self.cmd = self.RPHTemplate[self.remoteType]["command"] self.copy = self.RPHTemplate[self.remoteType]["copy"] self.path = self.RPHTemplate[self.remoteType]["path"] self.argFormat = self.RPHTemplate[self.remoteType]["argFormat"] except Exception: raise ConfigError("Request to initialize RemoteProcessHandler of unknown type: %s" % remoteType) # destination should be of type: [user@]host if self.remoteType==self.RPHType.SSH or self.remoteType==self.RPHType.GSISSH: try: self.cmd = self.cmd % { "rhost" : kwargs["host"] } self.copy = self.copy % { "rhost" : kwargs["host"] } self.host = kwargs["host"] except Exception: raise ConfigError("Request to initialize RemoteProcessHandler of type %s without remote host." % self.RPHType.enum2str(self.remoteType)) # add default arguments for all commands self.cmd = self.cmd % { "cmdargs" : kwargs.get("cmdargs",""), "args" : kwargs.get("args","") } self.copy = self.copy % { "cpargs" : kwargs.get("cpargs",""), "args" : kwargs.get("args","") } # test connection once proc = LoggedProcess(self.cmd % { "cmd" : "exit"}) ret = proc.getAll()[0] if ret != 0: raise CondorProcessError('Validation of remote connection failed!', proc) self._log.log(logging.INFO2, 'Remote interface initialized:\n\tCmd: %s\n\tCp : %s', self.cmd, self.copy)
def logged_copy_to_remote(self, source, dest): return LoggedProcess( str.join(' ', [ self._copy_cmd, self._get_ssh_link(), source, self._remote_path(dest) ]))
def logged_execute(self, cmd, args=''): return LoggedProcess( str.join('', [ self._shell_cmd, self._get_ssh_link(), self._remote_host, _format_args_ssh(cmd + ' ' + args) ]))
def LoggedDelete(self, target): return LoggedProcess( self._delete.cmd, self._delete.args({"target": target}), niceCmd=self._delete.niceCmd(), niceArgs=self._delete.niceArg({"target": target}), shell=False, )
def LoggedExecute(self, command, args='', nice_cmd=None, nice_args=None): return LoggedProcess( self._exeWrapper.cmd, args=self._exeWrapper.args(command=command, args=args), nice_cmd=self._exeWrapper.nice_cmd(command=(nice_cmd or command)), nice_args=self._exeWrapper.niceArg(args=(nice_args or args)), shell=False, )
def LoggedSocket(self, command="", args='', niceCmd=None, niceArgs=None): return LoggedProcess( self._socketWrapper.cmd, args=self._socketWrapper.args(command=command, args=args), niceCmd=self._socketWrapper.niceCmd(command=(niceCmd or command)), niceArgs=self._exeWrapper.niceArg(args=(niceArgs or args)), shell=False, )
def LoggedPut(self, source, destination): return LoggedProcess( self._copy.cmd, self._copy.args(source=source, destination=self.getGlobalAbsPath(destination)), niceCmd=self._copy.niceCmd(), niceArgs=self._copy.niceArg( source=source, destination=self.getGlobalAbsPath(destination)), shell=False, )
def _CreateSocket(self, duration = 60): args = [self.cmd, self.defaultArgs, "-o ControlMaster=yes", self.socketArgsDef, self.remoteHost, self._argFormat("sleep %d" % duration)] self.__ControlMaster = LoggedProcess(" ".join(args)) timeout = 0 while not os.path.exists(self.sshLink): time.sleep(0.5) timeout += 0.5 if timeout == 5: self._log.log(logging.INFO1, 'SSH socket still not available after 5 seconds...\n%s', self.sshLink) self._log.log(logging.INFO2, 'Socket process: %s', self.__ControlMaster.cmd) if timeout == 10: return False
def _create_socket(self, ssh_link_arg, duration=60): # Socket creation and cleanup args = [ self._shell_cmd, '-o ControlMaster=yes', ssh_link_arg, self._remote_host, _format_args_ssh('sleep %d' % duration) ] self._ssh_link_master_proc = LoggedProcess(str.join(' ', args)) timeout = 0 while not os.path.exists(self._ssh_link): time.sleep(0.5) timeout += 0.5 if timeout == 5: self._log.log( logging.INFO1, 'SSH socket still not available after 5 seconds...\n%s', self._ssh_link) self._log.log(logging.INFO2, 'Socket process: %s', self._ssh_link_master_proc.cmd) if timeout == 10: return False
def _submitJob(self, jobNum, module): activity = Activity('submitting job %d' % jobNum) try: sandbox = tempfile.mkdtemp('', '%s.%04d.' % (module.taskID, jobNum), self._sandbox_helper.get_path()) except Exception: raise BackendError('Unable to create sandbox directory "%s"!' % sandbox) sbPrefix = sandbox.replace(self._sandbox_helper.get_path(), '').lstrip('/') def translateTarget(d, s, t): return (d, s, os.path.join(sbPrefix, t)) self.smSBIn.doTransfer(ismap(translateTarget, self._getSandboxFilesIn(module))) self._writeJobConfig(os.path.join(sandbox, '_jobconfig.sh'), jobNum, module, { 'GC_SANDBOX': sandbox, 'GC_SCRATCH_SEARCH': str.join(' ', self.scratchPath)}) reqs = self.brokerSite.brokerAdd(module.getRequirements(jobNum), WMS.SITES) reqs = dict(self.brokerQueue.brokerAdd(reqs, WMS.QUEUES)) if (self.memory > 0) and (reqs.get(WMS.MEMORY, 0) < self.memory): reqs[WMS.MEMORY] = self.memory # local jobs need higher (more realistic) memory requirements (stdout, stderr) = (os.path.join(sandbox, 'gc.stdout'), os.path.join(sandbox, 'gc.stderr')) jobName = module.getDescription(jobNum).jobName proc = LoggedProcess(self.submitExec, '%s %s "%s" %s' % (self.submitOpts, self.getSubmitArguments(jobNum, jobName, reqs, sandbox, stdout, stderr), utils.pathShare('gc-local.sh'), self.getJobArguments(jobNum, sandbox))) retCode = proc.wait() gcIDText = proc.getOutput().strip().strip('\n') try: gcID = self.parseSubmitOutput(gcIDText) except Exception: gcID = None activity.finish() if retCode != 0: self._log.warning('%s failed:', self.submitExec) elif gcID is None: self._log.warning('%s did not yield job id:\n%s', self.submitExec, gcIDText) if gcID: gcID = self._createId(gcID) open(os.path.join(sandbox, gcID), 'w') else: proc.logError(self.errorLog) return (jobNum, utils.QM(gcID, gcID, None), {'sandbox': sandbox})
def LoggedCopyFromRemote(self, source, dest, **kwargs): return LoggedProcess( self.cpy, " ".join([source, dest]) )
def LoggedCopy(self, source, dest, remoteKey="<remote>"): if source.startswith(remoteKey): source = self.path%{"host":self.host,"path":source[len(remoteKey):]} if dest.startswith(remoteKey): dest = self.path%{"host":self.host,"path":dest[len(remoteKey):]} return LoggedProcess( self.copy % { "source" : "%s:%s"%(self.host,source), "dest" : dest } )
def LoggedExecute(self, cmd, args = '', **kwargs): return LoggedProcess( cmd , args )
def LoggedExecute(self, cmd, args = '', argFormat=defaultArg): if argFormat is defaultArg: argFormat=self.argFormat return LoggedProcess( self.cmd % { "cmd" : argFormat(self, "%s %s" % ( cmd, args )) } )
def LoggedCopyFromRemote(self, source, dest): return LoggedProcess( self.copy % { "source" : self.path%{"host":self.host,"path":source}, "dest" : dest } )
def LoggedExecute(self, command, args='', niceCmd=None, niceArgs=None): return LoggedProcess(command, args=args, niceCmd=niceCmd or command, niceArgs=niceArgs or args)
def _getJobsOutput(self, allIds): if len(allIds) == 0: raise StopIteration basePath = os.path.join(self._outputPath, 'tmp') try: if len(allIds) == 1: # For single jobs create single subdir basePath = os.path.join(basePath, md5(allIds[0][0]).hexdigest()) utils.ensureDirExists(basePath) except Exception: raise BackendError('Temporary path "%s" could not be created.' % basePath, BackendError) activity = Activity('retrieving %d job outputs' % len(allIds)) for ids in imap(lambda x: allIds[x:x+self._nJobsPerChunk], irange(0, len(allIds), self._nJobsPerChunk)): jobNumMap = dict(ids) jobs = ' '.join(self._getRawIDs(ids)) log = tempfile.mktemp('.log') proc = LoggedProcess(self._outputExec, '--noint --logfile "%s" --dir "%s" %s' % (log, basePath, jobs)) # yield output dirs todo = jobNumMap.values() done = [] currentJobNum = None for line in imap(str.strip, proc.iter()): match = re.match(self._outputRegex, line) if match: currentJobNum = jobNumMap.get(self._createId(match.groupdict()['rawId'])) todo.remove(currentJobNum) done.append(match.groupdict()['rawId']) outputDir = match.groupdict()['outputDir'] if os.path.exists(outputDir): if 'GC_WC.tar.gz' in os.listdir(outputDir): wildcardTar = os.path.join(outputDir, 'GC_WC.tar.gz') try: tarfile.TarFile.open(wildcardTar, 'r:gz').extractall(outputDir) os.unlink(wildcardTar) except Exception: self._log.error('Can\'t unpack output files contained in %s', wildcardTar) yield (currentJobNum, outputDir) currentJobNum = None retCode = proc.wait() if retCode != 0: if 'Keyboard interrupt raised by user' in proc.getError(): utils.removeFiles([log, basePath]) raise StopIteration else: proc.logError(self.errorLog, log = log) self._log.error('Trying to recover from error ...') for dirName in os.listdir(basePath): yield (None, os.path.join(basePath, dirName)) activity.finish() # return unretrievable jobs for jobNum in todo: yield (jobNum, None) purgeLog = tempfile.mktemp('.log') purgeProc = LoggedProcess(utils.resolveInstallPath('glite-ce-job-purge'), '--noint --logfile "%s" %s' % (purgeLog, str.join(' ', done))) retCode = purgeProc.wait() if retCode != 0: if self.explainError(purgeProc, retCode): pass else: proc.logError(self.errorLog, log = purgeLog, jobs = done) utils.removeFiles([log, purgeLog, basePath])
def LoggedExecute(self, cmd, args = '', **kwargs): self._socketHandler() return LoggedProcess( " ".join([self.cmd, self.defaultArgs, self.socketArgs, self.remoteHost, self._argFormat(cmd + " " + args)]) )
def LoggedCopyFromRemote(self, source, dest, **kwargs): self._socketHandler() return LoggedProcess( " ".join([self.cpy, self.defaultArgs, self.socketArgs, self._remotePath(source), dest]) )
def logged_execute(self, cmd, args=''): return LoggedProcess(cmd, args)
def logged_copy_to_remote(self, source, dest): return LoggedProcess('cp -r', '%s %s' % (source, dest))
def _getJobsOutput(self, allIds): if len(allIds) == 0: raise StopIteration basePath = os.path.join(self._outputPath, 'tmp') try: if len(allIds) == 1: # For single jobs create single subdir basePath = os.path.join(basePath, md5(allIds[0][0]).hexdigest()) utils.ensureDirExists(basePath) except Exception: raise BackendError( 'Temporary path "%s" could not be created.' % basePath, BackendError) activity = Activity('retrieving %d job outputs' % len(allIds)) for ids in imap(lambda x: allIds[x:x + self._nJobsPerChunk], irange(0, len(allIds), self._nJobsPerChunk)): jobNumMap = dict(ids) jobs = ' '.join(self._getRawIDs(ids)) log = tempfile.mktemp('.log') proc = LoggedProcess( self._outputExec, '--noint --logfile "%s" --dir "%s" %s' % (log, basePath, jobs)) # yield output dirs todo = jobNumMap.values() done = [] currentJobNum = None for line in imap(str.strip, proc.iter()): match = re.match(self._outputRegex, line) if match: currentJobNum = jobNumMap.get( self._createId(match.groupdict()['rawId'])) todo.remove(currentJobNum) done.append(match.groupdict()['rawId']) outputDir = match.groupdict()['outputDir'] if os.path.exists(outputDir): if 'GC_WC.tar.gz' in os.listdir(outputDir): wildcardTar = os.path.join(outputDir, 'GC_WC.tar.gz') try: tarfile.TarFile.open( wildcardTar, 'r:gz').extractall(outputDir) os.unlink(wildcardTar) except Exception: self._log.error( 'Can\'t unpack output files contained in %s', wildcardTar) yield (currentJobNum, outputDir) currentJobNum = None retCode = proc.wait() if retCode != 0: if 'Keyboard interrupt raised by user' in proc.getError(): utils.removeFiles([log, basePath]) raise StopIteration else: proc.logError(self.errorLog, log=log) self._log.error('Trying to recover from error ...') for dirName in os.listdir(basePath): yield (None, os.path.join(basePath, dirName)) activity.finish() # return unretrievable jobs for jobNum in todo: yield (jobNum, None) purgeLog = tempfile.mktemp('.log') purgeProc = LoggedProcess( utils.resolveInstallPath('glite-ce-job-purge'), '--noint --logfile "%s" %s' % (purgeLog, str.join(' ', done))) retCode = purgeProc.wait() if retCode != 0: if self.explainError(purgeProc, retCode): pass else: proc.logError(self.errorLog, log=purgeLog, jobs=done) utils.removeFiles([log, purgeLog, basePath])
def LoggedGet(self, source, destination): return LoggedProcess(self._copy.cmd, self._copy.args(source=source, destination=destination), niceCmd=self._copy.niceCmd())
def LoggedDelete(self, target): return LoggedProcess(self._delete.cmd, self._delete.args(target=target), niceCmd=self._delete.niceCmd())