def _resolveInputSandbox(self, inputSandbox): """ Internal function. Resolves wildcards for input sandbox files. This is currently linux specific and should be modified. """ resolvedIS = [] for i in inputSandbox: if not re.search(r'\*', i): if not os.path.isdir(i): resolvedIS.append(i) for name in inputSandbox: if re.search(r'\*', name): # escape the star character... cmd = 'ls -d ' + name output = systemCall(10, shlex.split(cmd)) if not output['OK']: self.log.error('Could not perform: ', cmd) elif output['Value'][0]: self.log.error(" Failed getting the files ", output['Value'][2]) else: files = output['Value'][1].split() for check in files: if os.path.isfile(check): self.log.verbose('Found file ' + check + ' appending to Input Sandbox') resolvedIS.append(check) if os.path.isdir(check): if re.search('/$', check): # users can specify e.g. /my/dir/lib/ check = check[:-1] tarName = os.path.basename(check) directory = os.path.dirname(check) # if just the directory this is null if directory: cmd = 'tar cfz ' + tarName + '.tar.gz ' + ' -C ' + directory + ' ' + tarName else: cmd = 'tar cfz ' + tarName + '.tar.gz ' + tarName output = systemCall(60, shlex.split(cmd)) if not output['OK']: self.log.error('Could not perform: %s' % (cmd)) resolvedIS.append(tarName + '.tar.gz') self.log.verbose('Found directory ' + check + ', appending ' + check + '.tar.gz to Input Sandbox') if os.path.isdir(name): self.log.verbose('Found specified directory ' + name + ', appending ' + name + '.tar.gz to Input Sandbox') if re.search('/$', name): # users can specify e.g. /my/dir/lib/ name = name[:-1] tarName = os.path.basename(name) directory = os.path.dirname(name) # if just the directory this is null if directory: cmd = 'tar cfz ' + tarName + '.tar.gz ' + ' -C ' + directory + ' ' + tarName else: cmd = 'tar cfz ' + tarName + '.tar.gz ' + tarName output = systemCall(60, shlex.split(cmd)) if not output['OK']: self.log.error('Could not perform: %s' % (cmd)) else: resolvedIS.append(tarName + '.tar.gz') return resolvedIS
def _resolveInputSandbox(self, inputSandbox): """ Internal function. Resolves wildcards for input sandbox files. This is currently linux specific and should be modified. """ resolvedIS = [] for i in inputSandbox: if not re.search(r'\*', i): if not os.path.isdir(i): resolvedIS.append(i) for name in inputSandbox: if re.search(r'\*', name): # escape the star character... cmd = 'ls -d ' + name output = systemCall(10, shlex.split(cmd)) if not output['OK']: self.log.error('Could not perform: ', cmd) elif output['Value'][0]: self.log.error(" Failed getting the files ", output['Value'][2]) else: files = output['Value'][1].split() for check in files: if os.path.isfile(check): self.log.verbose('Found file ' + check + ' appending to Input Sandbox') resolvedIS.append(check) if os.path.isdir(check): if re.search('/$', check): # users can specify e.g. /my/dir/lib/ check = check[:-1] tarName = os.path.basename(check) directory = os.path.dirname(check) # if just the directory this is null if directory: cmd = 'tar cfz ' + tarName + '.tar.gz ' + ' -C ' + directory + ' ' + tarName else: cmd = 'tar cfz ' + tarName + '.tar.gz ' + tarName output = systemCall(60, shlex.split(cmd)) if not output['OK']: self.log.error('Could not perform: %s' % (cmd)) resolvedIS.append(tarName + '.tar.gz') self.log.verbose('Found directory ' + check + ', appending ' + check + '.tar.gz to Input Sandbox') if os.path.isdir(name): self.log.verbose('Found specified directory ' + name + ', appending ' + name + '.tar.gz to Input Sandbox') if re.search('/$', name): # users can specify e.g. /my/dir/lib/ name = name[:-1] tarName = os.path.basename(name) directory = os.path.dirname(name) # if just the directory this is null if directory: cmd = 'tar cfz ' + tarName + '.tar.gz ' + ' -C ' + directory + ' ' + tarName else: cmd = 'tar cfz ' + tarName + '.tar.gz ' + tarName output = systemCall(60, shlex.split(cmd)) if not output['OK']: self.log.error('Could not perform: %s' % (cmd)) else: resolvedIS.append(tarName + '.tar.gz') return resolvedIS
def test_decodingCommandOutput(): retVal = systemCall(10, ["echo", "-e", "-n", r"\xdf"]) assert retVal["OK"] assert retVal["Value"] == (0, u"\ufffd", "") retVal = systemCall(10, ["echo", "-e", r"\xdf"]) assert retVal["OK"] assert retVal["Value"] == (0, u"\ufffd\n", "") sp = Subprocess() retVal = sp.systemCall(r"""python -c 'import os; os.fdopen(2, "wb").write(b"\xdf")'""", shell=True) assert retVal["OK"] assert retVal["Value"] == (0, "", u"\ufffd")
def _executeCommand( self ): """ execute the self.command (uses systemCall) """ failed = False outputDict = systemCall( timeout = 0, cmdSeq = shlex.split( self.command ), env = self.environment, callbackFunction = self.callbackFunction, bufferLimit = self.bufferLimit ) if not outputDict['OK']: failed = True self.log.error( 'System call execution failed:', '\n' + str( outputDict['Message'] ) ) status, stdout, stderr = outputDict['Value'][0:3] if status: failed = True self.log.error( "Non-zero status while executing", "%s: %s" % ( status, self.command ) ) else: self.log.info( "%s execution completed with status %s" % ( self.executable, status ) ) self.log.verbose( stdout ) self.log.verbose( stderr ) if os.path.exists( self.applicationLog ): self.log.verbose( 'Removing existing %s' % self.applicationLog ) os.remove( self.applicationLog ) with open( '%s/%s' % ( os.getcwd(), self.applicationLog ), 'w' ) as fopen: fopen.write( "<<<<<<<<<< %s Standard Output >>>>>>>>>>\n\n%s " % ( self.executable, stdout ) ) if stderr: fopen.write( "<<<<<<<<<< %s Standard Error >>>>>>>>>>\n\n%s " % ( self.executable, stderr ) ) self.log.info( "Output written to %s, execution complete." % ( self.applicationLog ) ) if failed: raise RuntimeError( "'%s' Exited With Status %s" % ( os.path.basename( self.executable ).split('_')[0], status ), status )
def _prepareHost(self): """ Prepare directories and copy control script """ # Make remote directories dirTuple = uniqueElements([ self.sharedArea, self.executableArea, self.infoArea, self.batchOutput, self.batchError, self.workArea ]) nDirs = len(dirTuple) cmdTuple = ['mkdir', '-p'] + dirTuple self.log.verbose('Creating working directories') result = systemCall(30, cmdTuple) if not result['OK']: self.log.warn('Failed creating working directories: %s' % result['Message'][1]) return result status, output, error = result['Value'] if status != 0: self.log.warn('Failed to create directories: %s' % output) return S_ERROR('Failed to create directories: %s' % output) # copy the control script now localScript = os.path.join(rootPath, "DIRAC", "Resources", "Computing", "remote_scripts", self.controlScript) self.log.verbose('Copying %s script' % self.controlScript) try: shutil.copy(localScript, self.sharedArea) # Chmod the control scripts self.finalScript = os.path.join(self.sharedArea, self.controlScript) os.chmod(self.finalScript, 0o755) except Exception, x: self.log.warn('Failed copying control script', x) return S_ERROR(x)
def _prepareHost( self ): """ Prepare directories and copy control script """ # Make remote directories dirTuple = uniqueElements( [ self.sharedArea, self.executableArea, self.infoArea, self.batchOutput, self.batchError, self.workArea] ) nDirs = len( dirTuple ) cmdTuple = [ 'mkdir', '-p' ] + dirTuple self.log.verbose( 'Creating working directories' ) result = systemCall( 30, cmdTuple ) if not result['OK']: self.log.warn( 'Failed creating working directories: %s' % result['Message'][1] ) return result status, output, error = result['Value'] if status != 0: self.log.warn( 'Failed to create directories: %s' % output ) return S_ERROR( 'Failed to create directories: %s' % output ) # copy the control script now localScript = os.path.join( rootPath, "DIRAC", "Resources", "Computing", "remote_scripts", self.controlScript ) self.log.verbose( 'Copying %s script' % self.controlScript ) try: shutil.copy( localScript, self.sharedArea ) # Chmod the control scripts self.finalScript = os.path.join( self.sharedArea, self.controlScript ) os.chmod( self.finalScript, 0o755 ) except Exception, x: self.log.warn( 'Failed copying control script', x ) return S_ERROR( x )
def main(): Script.parseCommandLine(ignoreErrors=True) args = Script.getPositionalArgs() from DIRAC.Interfaces.API.Dirac import Dirac # get arguments input_card = args[0] outprefix = "" if len(args) == 2: outprefix = args[1] #### get Parameter and set run_number ######### if os.environ.has_key('JOBID'): jobID = os.environ['JOBID'] dirac = Dirac() res = dirac.getJobJDL(jobID) run_number = res['Value']['Parameter.run'] # # run corsika os.system('chmod +x dirac_prod3_corsika_only') cmdTuple = ['./dirac_prod3_corsika_only', '--run', run_number, input_card] DIRAC.gLogger.notice('Executing command tuple:', cmdTuple) res = systemCall(0, cmdTuple, sendOutput) if not res['OK']: return res # ## rename output file #outfile = 'run%s.corsika.gz' % run_number outfile = '%srun%s.corsika.gz' % (outprefix, run_number) cmd = 'mv Data/corsika/*/*corsika.gz %s' % outfile if (os.system(cmd)): DIRAC.exit(-1) return DIRAC.S_OK()
def execute(self): """ The method called by the Workflow framework """ from DIRAC.Core.Utilities.Subprocess import systemCall from CTADIRAC.Core.Utilities.SoftwareInstallation import getSoftwareEnviron ret = self.__checkInputs() if not ret['OK']: return ret ret = getSoftwareEnviron(self.softwarePackage) if not ret['OK']: error = ret['Message'] self.log.error(error, self.softwarePackage) return DIRAC.S_ERROR(' '.join([error, str(self.softwarePackage)])) envdispEnviron = ret['Value'] cmdTuple = [self.edExe] cmdTuple.extend(self.edArguments) self.log.notice('Executing command tuple:', cmdTuple) ret = systemCall(0, cmdTuple, self.sendOutput, env=envdispEnviron) if not ret['OK']: self.log.error('Failed to execute evndisp:', ret['Message']) return DIRAC.S_ERROR('Can not execute evndisp') status, stdout, stderr = ret['Value'] self.log.notice('evndisp status is:', status) return status
def _executeCommand(self): """ execute the self.command (uses systemCall) """ failed = False outputDict = systemCall(timeout=0, cmdSeq=shlex.split(self.command), env=self.environment, callbackFunction=self.callbackFunction, bufferLimit=self.bufferLimit) if not outputDict['OK']: failed = True self.log.error('System call execution failed:', '\n' + str(outputDict['Message'])) status, stdout, stderr = outputDict['Value'][0:3] if status: failed = True self.log.error("Non-zero status while executing", "%s: %s" % (status, self.command)) else: self.log.info("%s execution completed with status %s" % (self.command, status)) self.log.verbose(stdout) self.log.verbose(stderr) if os.path.exists(self.applicationLog): self.log.verbose('Removing existing %s' % self.applicationLog) os.remove(self.applicationLog) with open('%s/%s' % (os.getcwd(), self.applicationLog), 'w') as fopen: fopen.write("<<<<<<<<<< %s Standard Output >>>>>>>>>>\n\n%s " % (self.executable, stdout)) if stderr: fopen.write("<<<<<<<<<< %s Standard Error >>>>>>>>>>\n\n%s " % (self.executable, stderr)) self.log.info("Output written to %s, execution complete." % (self.applicationLog)) if failed: self._exitWithError(status)
def getJobStatus( self, jobIDList ): """ Get the status information for the given list of jobs """ jobDict = {} for job in jobIDList: if not job: continue jobNumber = job.split( '.' )[0] jobDict[jobNumber] = job cmd = [ 'qstat' ] + jobIDList result = systemCall( 10, cmd ) if not result['OK']: return result resultDict = {} output = result['Value'][1].replace( '\r', '' ) lines = output.split( '\n' ) for job in jobDict: resultDict[jobDict[job]] = 'Unknown' for line in lines: if line.find( job ) != -1: if line.find( 'Unknown' ) != -1: resultDict[jobDict[job]] = 'Unknown' else: torqueStatus = line.split()[4] if torqueStatus in ['E', 'C']: resultDict[jobDict[job]] = 'Done' elif torqueStatus in ['R']: resultDict[jobDict[job]] = 'Running' elif torqueStatus in ['S', 'W', 'Q', 'H', 'T']: resultDict[jobDict[job]] = 'Waiting' return S_OK( resultDict )
def testTimeouts(self): """ test timeouts """ ## systemCall ret = systemCall(timeout=self.timeout, cmdSeq=self.cmd) self.assertEqual(ret, { 'Message': 'Timed out after 3 seconds', 'OK': False }) ## shellCall ret = shellCall(timeout=self.timeout, cmdSeq=" ".join(self.cmd)) self.assertEqual(ret, { 'Message': 'Timed out after 3 seconds', 'OK': False }) def pyfunc(name): time.sleep(10) return name ## pythonCall ret = pythonCall(self.timeout, pyfunc, "Krzysztof") self.assertEqual(ret, { 'Message': 'Timed out after 3 seconds', 'OK': False })
def __executeInProcess( self, executableFile ): os.environ[ 'X509_USER_PROXY' ] = self.__payloadProxyLocation self.__addperm( executableFile, stat.S_IRWXU ) result = systemCall( 0, [ executableFile ], callbackFunction = self.sendOutput ) if not result[ 'OK' ]: return result return self.__analyzeExitCode( result[ 'Value' ] )
def test_calls(timeout, expected): ret = systemCall(timeout, cmdSeq=cmd) assert ret["OK"] == expected ret = shellCall(timeout, cmdSeq=" ".join(cmd)) assert ret["OK"] == expected ret = pythonCall(timeout, pyfunc, "something") assert ret["OK"] == expected
def __getDirectorySize(path): """Get the total size of the given directory in bytes""" comm = "du -sb %s" % path result = systemCall(10, shlex.split(comm)) if not result["OK"] or result["Value"][0]: return 0 output = result["Value"][1] size = int(output.split()[0]) return size
def _getJobStatus(self, jobIDList): """ Get the status information for the given list of jobs """ resultDict = {} jobDict = {} for job in jobIDList: result = pfnparse(job) if result['OK']: stamp = result['Value']['FileName'] else: self.log.error('Invalid job id', job) continue jobDict[stamp] = job stampList = jobDict.keys() cmdTuple = [ self.finalScript, 'job_status', '#'.join(stampList), self.infoArea, self.userName ] result = systemCall(10, cmdTuple) if not result['OK']: return result status = result['Value'][0] stdout = result['Value'][1] stderr = result['Value'][2] # Examine results of the job status if status == 0: outputLines = stdout.strip().replace('\r', '').split('\n') try: index = outputLines.index( '============= Start output ===============') outputLines = outputLines[index + 1:] except: return S_ERROR("Invalid output from CE get status: %s" % outputLines[0]) try: status = int(outputLines[0]) except: return S_ERROR("Failed to get CE status: %s" % outputLines[0]) if status != 0: message = "Unknown reason" if len(outputLines) > 1: message = outputLines[1] return S_ERROR('Failed to get CE status, reason: %s' % message) for line in outputLines[1:]: if ':::' in line: jbundle = line.split(':::') if (len(jbundle) == 2): resultDict[jobDict[jbundle[0]]] = jbundle[1] else: return S_ERROR('\n'.join([stdout, stderr])) return S_OK(resultDict)
def doTest( self, elementDict ): """ Test upload and download for specified SE. """ elementName = elementDict[ 'ElementName' ] vo = elementDict[ 'VO' ] testFilePath = self.__localPath + self.__testFile if not os.path.exists( testFilePath ) or not os.path.isfile( testFilePath ): f = open( testFilePath, 'w' ) f.write( 'hello' ) f.close() status = 'OK' log = '' lfnPath = self.__lfnPath.format(vo=vo) + elementName + '-' + self.__testFile submissionTime = datetime.utcnow().replace( microsecond = 0 ) proxyPath = BESUtils.getProxyByVO( 'zhangxm', vo ) if not proxyPath[ 'OK' ]: gLogger.error('Can not get proxy for VO %s' % vo) return proxyPath proxyPath = proxyPath[ 'Value' ] env_test = os.environ.copy() env_test[ 'X509_USER_PROXY' ] = proxyPath cmd = [os.path.join(self.__scriptPath, self.__scriptName), '-o', '/DIRAC/Security/UseServerCertificate=no', lfnPath, testFilePath, elementName] result = systemCall(300, cmd, env=env_test) print result if not result['OK']: status = 'Bad' log += 'Call %s failed: %s' % (self.__scriptName, result['Message']) elif result['Value'][0] != 0: status = 'Bad' log += '%s exit with error %s:\n%s' % (self.__scriptName, result['Value'][0], result['Value'][1]) else: log += '%s exit successfully:\n%s' % (self.__scriptName, result['Value'][1]) completionTime = datetime.utcnow().replace( microsecond = 0 ) applicationTime = ( completionTime - submissionTime ).total_seconds() result = { 'Result' : { 'Status' : status, 'Log' : log, 'SubmissionTime' : submissionTime, 'CompletionTime' : completionTime, 'ApplicationTime' : applicationTime }, 'Finish' : True } # if os.path.exists( testFilePath ) and os.path.isfile( testFilePath ): # os.remove( testFilePath ) localFile = self.__localPath + elementName +'-' + self.__testFile if os.path.exists( localFile ) and os.path.isfile( localFile ): os.remove( localFile ) return S_OK( result )
def getRootFileGUID(fileName): """ Function to retrieve a file GUID using Root. """ res = systemCall(timeout=0, cmdSeq=shlex.split("getROOTFileGUID.py %s" % fileName)) if not res['OK']: return res if res['Value'][0]: return S_ERROR(res['Value'][2]) return S_OK(res['Value'][1])
def listAreaDirectory(area): DIRAC.gLogger.info("Listing content of area %s :" % (area)) res = systemCall( 5, ['ls', '-al', area] ) if not res['OK']: DIRAC.gLogger.error( 'Failed to list the area directory', res['Message'] ) elif res['Value'][0]: DIRAC.gLogger.error( 'Failed to list the area directory', res['Value'][2] ) else: # no timeout and exit code is 0 DIRAC.gLogger.info( res['Value'][1] )
def __getDirectorySize(path): """ Get the total size of the given directory in bytes """ comm = "du -sb %s" % path result = systemCall(10, shlex.split(comm)) if not result['OK'] or result['Value'][0]: return 0 output = result['Value'][1] size = int(output.split()[0]) return size
def execute(self): """ The method called by the Workflow framework """ from DIRAC.Core.Utilities.Subprocess import systemCall from CTADIRAC.Core.Utilities.SoftwareInstallation import getSoftwareEnviron from CTADIRAC.Core.Utilities.SoftwareInstallation import localArea ret = self.__checkInputs() if not ret['OK']: return ret ret = getSoftwareEnviron(self.softwarePackage) if not ret['OK']: error = ret['Message'] self.log.error(error, self.softwarePackage) return DIRAC.S_ERROR(' '.join([error, str(self.softwarePackage)])) hapEnviron = ret['Value'] hessroot = hapEnviron['HESSROOT'] rootlogon_file = hessroot + '/rootlogon.C' cp_cmd = 'cp ' + rootlogon_file + ' .' os.system(cp_cmd) fileName = hessroot + self.rootMacro if fileName[-1] == '+': # If the macro has to be compiled there is an extra "+" at the end of its name fileName = fileName[:-1] if not os.path.isfile(fileName): error = 'Hap Root macro file does not exist:' self.log.error(error, fileName) return DIRAC.S_ERROR(' '.join([error, fileName])) fileName = hessroot + self.rootMacro cmdTuple = ['root', '-b', '-q'] cmdTuple += [ '%s( %s )' % (fileName, ', '.join(self.rootArguments).replace("'", '"')) ] self.log.notice('Executing command tuple:', cmdTuple) ret = systemCall(0, cmdTuple, self.sendOutput, env=hapEnviron) if not ret['OK']: self.log.error('Failed to execute Root:', ret['Message']) return DIRAC.S_ERROR('Can not execute Hap root macro') return DIRAC.S_OK()
def _runApp(self, command, env=None): """ Safe system call of a command :param command basestring: the command to run :param env dict: environment where to run -- maybe the LHCb environment from LbLogin """ print 'Command called: \n%s' % command # Really printing here as we want to see and maybe cut/paste return systemCall(timeout=0, cmdSeq=shlex.split(command), callbackFunction=self.__redirectLogOutput, env=env)
def submitJob(self,executableFile,proxy, dummy=None): """ Method to submit job, should be overridden in sub-class. """ # FIXME: when not running on a WN this will not work ret = getProxyInfo() if not ret['OK']: pilotProxy = None else: pilotProxy = ret['Value']['path'] print 'pilotProxy', pilotProxy payloadEnv = dict( os.environ ) payloadProxy = '' if proxy: self.log.verbose('Setting up proxy for payload') result = self.writeProxyToFile(proxy) if not result['OK']: return result payloadProxy = result['Value'] # pilotProxy = os.environ['X509_USER_PROXY'] payloadEnv[ 'X509_USER_PROXY' ] = payloadProxy self.log.verbose('Starting process for monitoring payload proxy') gThreadScheduler.addPeriodicTask(self.proxyCheckPeriod,self.monitorProxy,taskArgs=(pilotProxy,payloadProxy),executions=0,elapsedTime=0) if not os.access(executableFile, 5): os.chmod(executableFile,0755) cmd = os.path.abspath(executableFile) self.log.verbose('CE submission command: %s' %(cmd)) result = systemCall(0,cmd,callbackFunction = self.sendOutput,env=payloadEnv) if payloadProxy: os.unlink(payloadProxy) ret = S_OK() if not result['OK']: self.log.error('Fail to run InProcess',result['Message']) elif result['Value'][0] < 0: self.log.error('InProcess Job Execution Failed') self.log.info('Exit status:',result['Value'][0]) return S_ERROR('InProcess Job Execution Failed') elif result['Value'][0] > 0: self.log.error('Fail in payload execution') self.log.info('Exit status:',result['Value'][0]) ret['PayloadFailed'] = result['Value'][0] else: self.log.debug('InProcess CE result OK') self.submittedJobs += 1 return ret
def _getJobStatus( self, jobIDList ): """ Get the status information for the given list of jobs """ resultDict = {} jobDict = {} for job in jobIDList: result = pfnparse( job ) if result['OK']: stamp = result['Value']['FileName'] else: self.log.error( 'Invalid job id', job ) continue jobDict[stamp] = job stampList = jobDict.keys() cmdTuple = [ self.finalScript, 'job_status', '#'.join( stampList ), self.infoArea, self.userName ] result = systemCall( 10, cmdTuple ) if not result['OK']: return result status = result['Value'][0] stdout = result['Value'][1] stderr = result['Value'][2] # Examine results of the job status if status == 0: outputLines = stdout.strip().replace( '\r', '' ).split( '\n' ) try: index = outputLines.index( '============= Start output ===============' ) outputLines = outputLines[index + 1:] except: return S_ERROR( "Invalid output from CE get status: %s" % outputLines[0] ) try: status = int( outputLines[0] ) except: return S_ERROR( "Failed to get CE status: %s" % outputLines[0] ) if status != 0: message = "Unknown reason" if len( outputLines ) > 1: message = outputLines[1] return S_ERROR( 'Failed to get CE status, reason: %s' % message ) for line in outputLines[1:]: if ':::' in line: jbundle = line.split( ':::' ) if ( len( jbundle ) == 2 ): resultDict[jobDict[jbundle[0]]] = jbundle[1] else: return S_ERROR( '\n'.join( [stdout, stderr] ) ) return S_OK( resultDict )
def listAreaDirectory(area): """ List the content of the given area """ DIRAC.gLogger.info("Listing content of area %s :" % (area)) res = systemCall(5, ['ls', '-al', area]) if not res['OK']: DIRAC.gLogger.error('Failed to list the area directory', res['Message']) elif res['Value'][0]: DIRAC.gLogger.error('Failed to list the area directory', res['Value'][2]) else: # no timeout and exit code is 0 DIRAC.gLogger.info(res['Value'][1])
def execute( self ): """ The method called by the Workflow framework """ from DIRAC.Core.Utilities.Subprocess import systemCall from CTADIRAC.Core.Utilities.SoftwareInstallation import getSoftwareEnviron ret = self.__checkInputs() if not ret['OK']: return ret ret = getSoftwareEnviron( self.softwarePackage ) if not ret['OK']: error = ret['Message'] self.log.error( error, self.softwarePackage ) return DIRAC.S_ERROR( ' '.join( [ error, str( self.softwarePackage ) ] ) ) rootEnviron = ret['Value'] fileName = self.rootMacro if fileName[-1] == '+': # If the macro has to be compiled there is an extra "+" at the end of its name fileName = fileName[:-1] if not os.path.isfile( fileName ): error = 'Root macro file does not exist:' self.log.error( error, fileName ) return DIRAC.S_ERROR( ' '.join( [ error, fileName ] ) ) cmdTuple = ['root', '-b', '-q'] cmdTuple += ['%s( %s )' % ( self.rootMacro, ', '.join( self.rootArguments ).replace( "'", '"' ) ) ] self.log.info( 'Executing command tuple:', cmdTuple ) ret = systemCall( 0, cmdTuple, env = rootEnviron ) if not ret['OK']: self.log.error( 'Failed to execute Root:', ret['Message'] ) return DIRAC.S_ERROR( 'Can not execute root' ) status, stdout, stderr = ret['Value'] if status: self.log.error( 'Root execution reports Error:', status ) self.log.error( stdout ) self.log.error( stderr ) return DIRAC.S_ERROR( 'Failed root Execution' ) self.log.info( 'Root stdout:' ) self.log.info( stdout ) return DIRAC.S_OK()
def _killJobs(self, jobIDList, host=None): """ Kill the jobs for the given list of job IDs """ resultDict = {} jobDict = {} for job in jobIDList: result = pfnparse(job) if result['OK']: stamp = result['Value']['FileName'] else: self.log.error('Invalid job id', job) continue jobDict[stamp] = job stampList = jobDict.keys() cmdTuple = [ self.finalScript, 'kill_job', '#'.join(stampList), self.infoArea ] result = systemCall(10, cmdTuple) if not result['OK']: return result status = result['Value'][0] stdout = result['Value'][1] stderr = result['Value'][2] # Examine results of the job submission if status != 0: outputLines = stdout.strip().replace('\r', '').split('\n') try: index = outputLines.index( '============= Start output ===============') outputLines = outputLines[index + 1:] except: return S_ERROR("Invalid output from kill Job: %s" % outputLines[0]) try: status = int(outputLines[0]) except: return S_ERROR("Failed to kill Job: %s" % outputLines[0]) if status != 0: message = "Unknown reason" if len(outputLines) > 1: message = outputLines[1] return S_ERROR('Failed to kill Job, reason: %s' % message) return S_ERROR('\n'.join([stdout, stderr])) return S_OK()
def install_CorsikaSimtelPack(version, build_dir): from CTADIRAC.Core.Utilities.SoftwareInstallation import checkSoftwarePackage from CTADIRAC.Core.Utilities.SoftwareInstallation import installSoftwarePackage from CTADIRAC.Core.Utilities.SoftwareInstallation import installSoftwareEnviron from CTADIRAC.Core.Utilities.SoftwareInstallation import sharedArea from CTADIRAC.Core.Utilities.SoftwareInstallation import workingArea from DIRAC.Core.Utilities.Subprocess import systemCall packs = [CorsikaSimtelPack] for package in packs: DIRAC.gLogger.notice('Checking:', package) if sharedArea: if checkSoftwarePackage(package, sharedArea())['OK']: DIRAC.gLogger.notice('Package found in Shared Area:', package) installSoftwareEnviron(package, workingArea(), build_dir) packageTuple = package.split('/') corsika_subdir = os.path.join(sharedArea(), packageTuple[0], version) cmd = 'cp -u -r ' + corsika_subdir + '/* .' if (os.system(cmd)): DIRAC.exit(-1) continue if workingArea: print 'workingArea is %s ' % workingArea() if installSoftwarePackage(package, workingArea(), extract=False)['OK']: fd = open('run_compile.sh', 'w') fd.write("""#! /bin/sh current_dir=%s mkdir sim sim-sc3 (cd sim && tar zxvf ${current_dir}/corsika_simhessarray.tar.gz && ./build_all prod2 qgs2) (cd sim-sc3 && tar zxvf ${current_dir}/corsika_simhessarray.tar.gz && ./build_all sc3 qgs2)""" % (workingArea())) fd.close() os.system('chmod u+x run_compile.sh') #os.system('cat run_compile.sh') cmdTuple = ['./run_compile.sh'] ret = systemCall(0, cmdTuple, sendOutput) if not ret['OK']: DIRAC.gLogger.error('Failed to compile') DIRAC.exit(-1) installSoftwareEnviron(package, workingArea(), build_dir) continue DIRAC.gLogger.error('Software package not correctly installed') DIRAC.exit(-1) return DIRAC.S_OK
def main(): Script.parseCommandLine(ignoreErrors=True) from CTADIRAC.Core.Utilities.Prod3SoftwareManager import Prod3SoftwareManager args = Script.getPositionalArgs() print args # get arguments package = args[1] version = args[2] arch = "sl6-gcc44" simtelcfg = args[3] + '.cfg' simtelopts = args[4] # # install software prod3swm = Prod3SoftwareManager() # check where package is installed res = prod3swm.checkSoftwarePackage(package, version, arch) if not res['OK']: res = prod3swm.installSoftwarePackage(package, version, arch) if not res['OK']: return res else: package_dir = res['Value'] prod3swm.dumpSetupScriptPath(package_dir) else: # # dump the SetupScriptPath to be sourced by DIRAC scripts # ## copy DIRAC scripts in the current directory package_dir = res['Value'] prod3swm.dumpSetupScriptPath(package_dir) res = prod3swm.installDIRACScripts(package_dir) if not res['OK']: return res ### get input files inputfilestr = '' for corsikafile in glob.glob('./*corsika.gz'): inputfilestr = inputfilestr + ' ' + corsikafile # # run simtel_array cmdTuple = [ './dirac_prod3_simtel_only', simtelcfg, simtelopts, inputfilestr ] DIRAC.gLogger.notice('Executing command tuple:', cmdTuple) res = systemCall(0, cmdTuple, sendOutput) if not res['OK']: return res return DIRAC.S_OK()
def __execute( self, executableList ): """Run glexec with checking of the exit status code. With no executable it will renew the glexec proxy """ #Just in case glCmd = [ self.__gl ] if executableList: try: os.chmod( executableList[0], os.stat( executableList[0] )[0] | stat.S_IEXEC | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH ) except: pass glCmd.extend( executableList ) self.log.info( 'CE submission command is: %s' % glCmd ) result = systemCall( 0, glCmd, callbackFunction = self.sendOutput ) if not result[ 'OK' ]: return result return self.__analyzeExitCode( result[ 'Value' ] )
def _executeCommand(self): """execute the self.command (uses systemCall)""" failed = False remoteRunner = RemoteRunner() if remoteRunner.is_remote_execution(): retVal = remoteRunner.execute(self.command) else: retVal = systemCall( timeout=0, cmdSeq=shlex.split(self.command), env=self.environment, callbackFunction=self.callbackFunction, bufferLimit=self.bufferLimit, ) if not retVal["OK"]: failed = True self.log.error("System call execution failed:", "\n" + str(retVal["Message"])) self._exitWithError(1000) status, stdout, stderr = retVal["Value"][0:3] if status: failed = True self.log.error("Non-zero status while executing", "%s exited with status %s" % (self.command, status)) else: self.log.info("%s execution completed with status %s" % (self.command, status)) self.log.verbose(stdout) self.log.verbose(stderr) if os.path.exists(self.applicationLog): self.log.verbose("Removing existing %s" % self.applicationLog) os.remove(self.applicationLog) with io.open("%s/%s" % (os.getcwd(), self.applicationLog), "wt") as fopen: fopen.write("<<<<<<<<<< %s Standard Output >>>>>>>>>>\n\n%s " % (self.executable, stdout)) if stderr: fopen.write("<<<<<<<<<< %s Standard Error >>>>>>>>>>\n\n%s " % (self.executable, stderr)) self.log.info("Output written to %s, execution complete." % (self.applicationLog)) if failed: self._exitWithError(status)
def runCommand(cmd, timeout=120): """Wrapper around systemCall to return S_OK(stdout) or S_ERROR(message)""" result = systemCall(timeout=timeout, cmdSeq=shlex.split(cmd)) if not result["OK"]: return result status, stdout, stderr = result["Value"][0:3] if status: gLogger.warn("Status %s while executing %s" % (status, cmd)) gLogger.warn(stderr) if stdout: return S_ERROR(stdout) if stderr: return S_ERROR(stderr) return S_ERROR("Status %s while executing %s" % (status, cmd)) else: return S_OK(str(stdout))
def __getPIDs( self, agentName ): """return PID for agentName""" ## Whitespaces around third argument are mandatory to only match the given agentName pidRes = systemCall( 10, [ 'pgrep', '-f', ' RequestManagement/%s ' % agentName ] ) if not pidRes['OK']: return pidRes pid = pidRes['Value'][1].strip() pid = pid.split("\n") pids = [] for pi in pid: try: pids.append( int( pi ) ) except ValueError as e: self.log.error( "Could not create int from PID: ", "PID %s: %s" % (pi, e) ) return S_OK( pids )
def _killJobs( self, jobIDList, host = None ): """ Kill the jobs for the given list of job IDs """ resultDict = {} jobDict = {} for job in jobIDList: result = pfnparse( job ) if result['OK']: stamp = result['Value']['FileName'] else: self.log.error( 'Invalid job id', job ) continue jobDict[stamp] = job stampList = jobDict.keys() cmdTuple = [ self.finalScript, 'kill_job', '#'.join( stampList ), self.infoArea ] result = systemCall( 10, cmdTuple ) if not result['OK']: return result status = result['Value'][0] stdout = result['Value'][1] stderr = result['Value'][2] # Examine results of the job submission if status != 0: outputLines = stdout.strip().replace( '\r', '' ).split( '\n' ) try: index = outputLines.index( '============= Start output ===============' ) outputLines = outputLines[index + 1:] except: return S_ERROR( "Invalid output from kill Job: %s" % outputLines[0] ) try: status = int( outputLines[0] ) except: return S_ERROR( "Failed to kill Job: %s" % outputLines[0] ) if status != 0: message = "Unknown reason" if len( outputLines ) > 1: message = outputLines[1] return S_ERROR( 'Failed to kill Job, reason: %s' % message ) return S_ERROR( '\n'.join( [stdout, stderr] ) ) return S_OK()
def testNoTimeouts(self): """ tests no timeouts """ ## systemCall ret = systemCall(timeout=False, cmdSeq=self.cmd) self.assertEqual(ret, {"OK": True, "Value": (0, "", "")}) ## shellCall ret = shellCall(timeout=False, cmdSeq=" ".join(self.cmd)) self.assertEqual(ret, {"OK": True, "Value": (0, "", "")}) def pyfunc(name): time.sleep(10) return name ## pythonCall ret = pythonCall(0, pyfunc, "Krzysztof") self.assertEqual(ret, {"OK": True, "Value": "Krzysztof"})
def testTimeouts(self): """ test timeouts """ ## systemCall ret = systemCall(timeout=self.timeout, cmdSeq=self.cmd) self.assertFalse(ret["OK"]) ## shellCall ret = shellCall(timeout=self.timeout, cmdSeq=" ".join(self.cmd)) self.assertFalse(ret["OK"]) def pyfunc(name): time.sleep(10) return name ## pythonCall ret = pythonCall(self.timeout, pyfunc, "Krzysztof") self.assertFalse(ret["OK"])
def testTimeouts(self): """ test timeouts """ ## systemCall ret = systemCall(timeout=self.timeout, cmdSeq=self.cmd) self.assertFalse(ret['OK']) ## shellCall ret = shellCall(timeout=self.timeout, cmdSeq=" ".join(self.cmd)) self.assertFalse(ret['OK']) def pyfunc(name): time.sleep(10) return name ## pythonCall ret = pythonCall(self.timeout, pyfunc, "Krzysztof") self.assertFalse(ret['OK'])
def testTimeouts( self ): """ test timeouts """ ## systemCall ret = systemCall( timeout=self.timeout, cmdSeq = self.cmd ) self.assertEqual( ret, {'Message': 'Timed out after 3 seconds', 'OK': False} ) ## shellCall ret = shellCall( timeout=self.timeout, cmdSeq = " ".join( self.cmd ) ) self.assertEqual( ret, {'Message': 'Timed out after 3 seconds', 'OK': False} ) def pyfunc( name ): time.sleep(10) return name ## pythonCall ret = pythonCall( self.timeout, pyfunc, "Krzysztof" ) self.assertEqual( ret, {'Message': 'Timed out after 3 seconds', 'OK': False} )
def testNoTimeouts(self): """ tests no timeouts """ ## systemCall ret = systemCall(timeout=False, cmdSeq=self.cmd) self.assertEqual(ret, {'OK': True, 'Value': (0, '', '')}) ## shellCall ret = shellCall(timeout=False, cmdSeq=" ".join(self.cmd)) self.assertEqual(ret, {'OK': True, 'Value': (0, '', '')}) def pyfunc(name): time.sleep(10) return name ## pythonCall ret = pythonCall(0, pyfunc, "Krzysztof") self.assertEqual(ret, {'OK': True, 'Value': 'Krzysztof'})
def executeGridCommand(proxy, cmd, gridEnvScript=None): """ Execute cmd tuple after sourcing GridEnv """ currentEnv = dict(os.environ) if not gridEnvScript: # if not passed as argument, use default from CS Helpers gridEnvScript = Local.gridEnv() if gridEnvScript: command = gridEnvScript.split() ret = sourceEnv(10, command) if not ret["OK"]: return S_ERROR("Failed sourcing GridEnv: %s" % ret["Message"]) gridEnv = ret["outputEnv"] # # Preserve some current settings if they are there # if "X509_VOMS_DIR" in currentEnv: gridEnv["X509_VOMS_DIR"] = currentEnv["X509_VOMS_DIR"] if "X509_CERT_DIR" in currentEnv: gridEnv["X509_CERT_DIR"] = currentEnv["X509_CERT_DIR"] else: gridEnv = currentEnv if not proxy: res = getProxyInfo() if not res["OK"]: return res gridEnv["X509_USER_PROXY"] = res["Value"]["path"] elif isinstance(proxy, str): if os.path.exists(proxy): gridEnv["X509_USER_PROXY"] = proxy else: return S_ERROR("Can not treat proxy passed as a string") else: ret = gProxyManager.dumpProxyToFile(proxy) if not ret["OK"]: return ret gridEnv["X509_USER_PROXY"] = ret["Value"] result = systemCall(120, cmd, env=gridEnv) return result
def _getStatus(self): """ Get jobs running """ cmdTuple = [ self.finalScript, 'status_info', self.infoArea, self.workArea, self.userName, self.execQueue ] result = systemCall(10, cmdTuple) if not result['OK']: return result status = result['Value'][0] stdout = result['Value'][1] stderr = result['Value'][2] # Examine results of the job status resultDict = {} if status == 0: outputLines = stdout.strip().replace('\r', '').split('\n') try: index = outputLines.index( '============= Start output ===============') outputLines = outputLines[index + 1:] except: return S_ERROR("Invalid output from CE get status: %s" % outputLines[0]) try: status = int(outputLines[0]) except: return S_ERROR("Failed to get CE status: %s" % outputLines[0]) if status != 0: message = "Unknown reason" if len(outputLines) > 1: message = outputLines[1] return S_ERROR('Failed to get CE status, reason: %s' % message) for line in outputLines[1:]: if ':::' in line: jobStatus, nJobs = line.split(':::') resultDict[jobStatus] = int(nJobs) else: return S_ERROR('\n'.join([stdout, stderr])) return S_OK(resultDict)
def executeGridCommand(proxy, cmd, gridEnvScript=None): """ Execute cmd tuple after sourcing GridEnv """ currentEnv = dict(os.environ) if not gridEnvScript: # if not passed as argument, use default from CS Helpers gridEnvScript = Local.gridEnv() if gridEnvScript: command = gridEnvScript.split() ret = sourceEnv(10, command) if not ret['OK']: return S_ERROR('Failed sourcing GridEnv: %s' % ret['Message']) gridEnv = ret['outputEnv'] # # Preserve some current settings if they are there # if currentEnv.has_key('X509_VOMS_DIR'): gridEnv['X509_VOMS_DIR'] = currentEnv['X509_VOMS_DIR'] if currentEnv.has_key('X509_CERT_DIR'): gridEnv['X509_CERT_DIR'] = currentEnv['X509_CERT_DIR'] else: gridEnv = currentEnv if not proxy: res = getProxyInfo() if not res['OK']: return res gridEnv['X509_USER_PROXY'] = res['Value']['path'] elif type(proxy) in types.StringTypes: if os.path.exists(proxy): gridEnv['X509_USER_PROXY'] = proxy else: return S_ERROR('Can not treat proxy passed as a string') else: ret = gProxyManager.dumpProxyToFile(proxy) if not ret['OK']: return ret gridEnv['X509_USER_PROXY'] = ret['Value'] result = systemCall(120, cmd, env=gridEnv) return result
def executeGridCommand( proxy, cmd, gridEnvScript = None ): """ Execute cmd tuple after sourcing GridEnv """ currentEnv = dict( os.environ ) if not gridEnvScript: # if not passed as argument, use default from CS Helpers gridEnvScript = Local.gridEnv() if gridEnvScript: command = gridEnvScript.split() ret = sourceEnv( 10, command ) if not ret['OK']: return S_ERROR( 'Failed sourcing GridEnv: %s' % ret['Message'] ) gridEnv = ret['outputEnv'] # # Preserve some current settings if they are there # if currentEnv.has_key( 'X509_VOMS_DIR' ): gridEnv['X509_VOMS_DIR'] = currentEnv['X509_VOMS_DIR'] if currentEnv.has_key( 'X509_CERT_DIR' ): gridEnv['X509_CERT_DIR'] = currentEnv['X509_CERT_DIR'] else: gridEnv = currentEnv if not proxy: res = getProxyInfo() if not res['OK']: return res gridEnv['X509_USER_PROXY' ] = res['Value']['path'] elif isinstance( proxy, basestring ): if os.path.exists( proxy ): gridEnv[ 'X509_USER_PROXY' ] = proxy else: return S_ERROR( 'Can not treat proxy passed as a string' ) else: ret = gProxyManager.dumpProxyToFile( proxy ) if not ret['OK']: return ret gridEnv[ 'X509_USER_PROXY' ] = ret['Value'] result = systemCall( 120, cmd, env = gridEnv ) return result
def install_CorsikaSimtelPack(version): from CTADIRAC.Core.Utilities.SoftwareInstallation import checkSoftwarePackage from CTADIRAC.Core.Utilities.SoftwareInstallation import installSoftwarePackage from CTADIRAC.Core.Utilities.SoftwareInstallation import installSoftwareEnviron from CTADIRAC.Core.Utilities.SoftwareInstallation import sharedArea from CTADIRAC.Core.Utilities.SoftwareInstallation import workingArea from DIRAC.Core.Utilities.Subprocess import systemCall CorsikaSimtelPack = os.path.join('corsika_simhessarray', version, 'corsika_simhessarray') packs = [CorsikaSimtelPack] for package in packs: DIRAC.gLogger.notice('Checking:', package) if sharedArea: if checkSoftwarePackage(package, sharedArea())['OK']: DIRAC.gLogger.notice('Package found in Shared Area:', package) installSoftwareEnviron(package, workingArea()) packageTuple = package.split('/') corsika_subdir = sharedArea( ) + '/' + packageTuple[0] + '/' + version cmd = 'cp -u -r ' + corsika_subdir + '/* .' os.system(cmd) continue if workingArea: if installSoftwarePackage(package, workingArea())['OK']: ############## compile ############################# if 'sc3' in version: compilation_opt = 'sc3' else: compilation_opt = 'prod2' DIRAC.gLogger.notice('Compiling with option:', compilation_opt) cmdTuple = ['./build_all', compilation_opt, 'qgs2'] ret = systemCall(0, cmdTuple, sendOutput) if not ret['OK']: DIRAC.gLogger.error('Failed to execute build') DIRAC.exit(-1) continue DIRAC.gLogger.error('Check Failed for software package:', package) DIRAC.gLogger.error('Software package not available') DIRAC.exit(-1)
def __getPIDs(self, agentName): """return PID for agentName""" ## Whitespaces around third argument are mandatory to only match the given agentName pidRes = systemCall( 10, ['pgrep', '-f', ' RequestManagement/%s ' % agentName]) if not pidRes['OK']: return pidRes pid = pidRes['Value'][1].strip() pid = pid.split("\n") pids = [] for pi in pid: try: pids.append(int(pi)) except ValueError as e: self.log.error("Could not create int from PID: ", "PID %s: %s" % (pi, e)) return S_OK(pids)
def _getStatus( self ): """ Get jobs running """ cmdTuple = [ self.finalScript, 'status_info', self.infoArea, self.workArea, self.userName, self.execQueue ] result = systemCall( 10, cmdTuple ) if not result['OK']: return result status = result['Value'][0] stdout = result['Value'][1] stderr = result['Value'][2] # Examine results of the job status resultDict = {} if status == 0: outputLines = stdout.strip().replace( '\r', '' ).split( '\n' ) try: index = outputLines.index( '============= Start output ===============' ) outputLines = outputLines[index + 1:] except: return S_ERROR( "Invalid output from CE get status: %s" % outputLines[0] ) try: status = int( outputLines[0] ) except: return S_ERROR( "Failed to get CE status: %s" % outputLines[0] ) if status != 0: message = "Unknown reason" if len( outputLines ) > 1: message = outputLines[1] return S_ERROR( 'Failed to get CE status, reason: %s' % message ) for line in outputLines[1:]: if ':::' in line: jobStatus, nJobs = line.split( ':::' ) resultDict[jobStatus] = int( nJobs ) else: return S_ERROR( '\n'.join( [stdout, stderr] ) ) return S_OK( resultDict )
def _prepareHost( self ): """ Prepare directories and copy control script """ # Make remote directories dirTuple = uniqueElements( [ self.sharedArea, self.executableArea, self.infoArea, self.batchOutput, self.batchError, self.workArea] ) cmdTuple = [ 'mkdir', '-p' ] + dirTuple self.log.verbose( 'Creating working directories' ) result = systemCall( 30, cmdTuple ) if not result['OK']: self.log.warn( 'Failed creating working directories: %s' % result['Message'][1] ) return result status, output, _error = result['Value'] if status != 0: self.log.warn( 'Failed to create directories: %s' % output ) return S_ERROR( 'Failed to create directories: %s' % output ) return S_OK()
self.log.warn( 'Failed copying executable', x ) return S_ERROR( x ) jobStamps = [] for i in range( numberOfJobs ): jobStamps.append( makeGuid()[:8] ) jobStamp = '#'.join( jobStamps ) subOptions = urllib.quote( self.submitOptions ) cmdTuple = [ self.finalScript, 'submit_job', executable, self.batchOutput, self.batchError, self.workArea, str( numberOfJobs ), self.infoArea, jobStamp, self.execQueue, subOptions ] self.log.verbose( 'CE submission command: %s' % ' '.join( cmdTuple ) ) result = systemCall( 120, cmdTuple ) if not result['OK']: self.log.error( '%s CE job submission failed' % self.ceType, result['Message'] ) return result status = result['Value'][0] stdout = result['Value'][1] stderr = result['Value'][2] # Examine results of the job submission if status == 0: outputLines = stdout.strip().replace( '\r', '' ).split( '\n' ) try: index = outputLines.index( '============= Start output ===============' ) outputLines = outputLines[index + 1:]
def getCEStatus( self ): """ Method to return information on running and pending jobs. """ result = S_OK() result['SubmittedJobs'] = self.submittedJobs cmd = ["qstat", "-Q" , self.execQueue ] if self.userName: cmd = [ "qstat", "-u", self.userName, self.execQueue ] ret = systemCall( 10, cmd ) if not ret['OK']: self.log.error( 'Timeout', ret['Message'] ) return ret status = ret['Value'][0] stdout = ret['Value'][1] stderr = ret['Value'][2] self.log.debug( "status:", status ) self.log.debug( "stdout:", stdout ) self.log.debug( "stderr:", stderr ) if status: self.log.error( 'Failed qstat execution:', stderr ) return S_ERROR( stderr ) if self.userName: # Parse qstat -u userName queueName runningJobs = 0 waitingJobs = 0 lines = stdout.replace( '\r', '' ).split( '\n' ) for line in lines: if not line: continue if line.find( self.userName ) != -1: if 'R' == line.split( ' ' )[-2]: runningJobs += 1 else: # every other status to assimilate to Waiting waitingJobs += 1 else: # parse qstat -Q queueName matched = re.search( self.queue + "\D+(\d+)\D+(\d+)\W+(\w+)\W+(\w+)\D+(\d+)\D+(\d+)\D+(\d+)\D+(\d+)\D+(\d+)\D+(\d+)\W+(\w+)", stdout ) if matched.groups < 6: return S_ERROR( "Error retrieving information from qstat:" + stdout + stderr ) try: waitingJobs = int( matched.group( 5 ) ) runningJobs = int( matched.group( 6 ) ) except ValueError: return S_ERROR( "Error retrieving information from qstat:" + stdout + stderr ) result['WaitingJobs'] = waitingJobs result['RunningJobs'] = runningJobs self.log.verbose( 'Waiting Jobs: ', waitingJobs ) self.log.verbose( 'Running Jobs: ', runningJobs ) return result
def submitJob(self, executableFile, proxy, jobDesc, log, logLevel, **kwargs): """ Start a container for a job. executableFile is ignored. A new wrapper suitable for running in a container is created from jobDesc. """ rootImage = self.__root # Check that singularity is available if not self.__hasSingularity(): self.log.error('Singularity is not installed on PATH.') result = S_ERROR("Failed to find singularity ") result['ReschedulePayload'] = True return result self.log.info('Creating singularity container') # Start by making the directory for the container ret = self.__createWorkArea(proxy, jobDesc, log, logLevel) if not ret['OK']: return ret baseDir = ret['baseDir'] tmpDir = ret['tmpDir'] proxyLoc = ret['proxyLocation'] # Now we have to set-up proxy renewal for the container # This is fairly easy as it remains visible on the host filesystem ret = getProxyInfo() if not ret['OK']: pilotProxy = None else: pilotProxy = ret['Value']['path'] result = gThreadScheduler.addPeriodicTask(self.proxyCheckPeriod, self._monitorProxy, taskArgs=(pilotProxy, proxyLoc), executions=0, elapsedTime=0) renewTask = None if result['OK']: renewTask = result['Value'] else: self.log.warn('Failed to start proxy renewal task') # Very simple accounting self.__submittedJobs += 1 self.__runningJobs += 1 # Now prepare start singularity # Mount /cvmfs in if it exists on the host withCVMFS = os.path.isdir("/cvmfs") innerCmd = os.path.join(self.__innerdir, "dirac_container.sh") cmd = [self.__singularityBin, "exec"] cmd.extend(["-c", "-i", "-p"]) cmd.extend(["-W", baseDir]) if withCVMFS: cmd.extend(["-B", "/cvmfs"]) if 'ContainerBind' in self.ceParameters: bindPaths = self.ceParameters['ContainerBind'].split(',') for bindPath in bindPaths: cmd.extend(["-B", bindPath.strip()]) if 'ContainerOptions' in self.ceParameters: containerOpts = self.ceParameters['ContainerOptions'].split(',') for opt in containerOpts: cmd.extend([opt.strip()]) cmd.extend([rootImage, innerCmd]) self.log.debug('Execute singularity command: %s' % cmd) self.log.debug('Execute singularity env: %s' % self.__getEnv()) result = systemCall(0, cmd, callbackFunction=self.sendOutput, env=self.__getEnv()) self.__runningJobs -= 1 if not result["OK"]: if renewTask: gThreadScheduler.removeTask(renewTask) result = S_ERROR("Error running singularity command") result['ReschedulePayload'] = True return result result = self.__checkResult(tmpDir) if not result["OK"]: if renewTask: gThreadScheduler.removeTask(renewTask) return result
def export_updateSoftware(self, version, rootPath="", gridVersion=""): """ Update the local DIRAC software installation to version """ # Check that we have a sane local configuration result = gConfig.getOptionsDict("/LocalInstallation") if not result["OK"]: return S_ERROR("Invalid installation - missing /LocalInstallation section in the configuration") elif not result["Value"]: return S_ERROR("Invalid installation - empty /LocalInstallation section in the configuration") if rootPath and not os.path.exists(rootPath): return S_ERROR('Path "%s" does not exists' % rootPath) # For LHCb we need to check Oracle client installOracleClient = False oracleFlag = gConfig.getValue("/LocalInstallation/InstallOracleClient", "unknown") if oracleFlag.lower() in ["yes", "true", "1"]: installOracleClient = True elif oracleFlag.lower() == "unknown": result = systemCall(30, ["python", "-c", "import cx_Oracle"]) if result["OK"] and result["Value"][0] == 0: installOracleClient = True cmdList = ["dirac-install", "-r", version, "-t", "server"] if rootPath: cmdList.extend(["-P", rootPath]) # Check if there are extensions extensionList = getCSExtensions() webFlag = gConfig.getValue("/LocalInstallation/WebPortal", False) if webFlag: extensionList.append("Web") if extensionList: cmdList += ["-e", ",".join(extensionList)] # Are grid middleware bindings required ? if gridVersion: cmdList.extend(["-g", gridVersion]) targetPath = gConfig.getValue( "/LocalInstallation/TargetPath", gConfig.getValue("/LocalInstallation/RootPath", "") ) if targetPath and os.path.exists(targetPath + "/etc/dirac.cfg"): cmdList.append(targetPath + "/etc/dirac.cfg") else: return S_ERROR("Local configuration not found") result = systemCall(240, cmdList) if not result["OK"]: return result status = result["Value"][0] if status != 0: # Get error messages error = [] output = result["Value"][1].split("\n") for line in output: line = line.strip() if "error" in line.lower(): error.append(line) if error: message = "\n".join(error) else: message = "Failed to update software to %s" % version return S_ERROR(message) # Check if there is a MySQL installation and fix the server scripts if necessary if os.path.exists(InstallTools.mysqlDir): startupScript = os.path.join(InstallTools.instancePath, "mysql", "share", "mysql", "mysql.server") if not os.path.exists(startupScript): startupScript = os.path.join( InstallTools.instancePath, "pro", "mysql", "share", "mysql", "mysql.server" ) if os.path.exists(startupScript): InstallTools.fixMySQLScripts(startupScript) # For LHCb we need to check Oracle client if installOracleClient: result = systemCall(30, "install_oracle-client.sh") if not result["OK"]: return result status = result["Value"][0] if status != 0: # Get error messages error = result["Value"][1].split("\n") error.extend(result["Value"][2].split("\n")) error.append("Failed to install Oracle client module") return S_ERROR("\n".join(error)) return S_OK()
def export_updateSoftware( self, version, rootPath = "", gridVersion = "" ): """ Update the local DIRAC software installation to version """ # Check that we have a sane local configuration result = gConfig.getOptionsDict( '/LocalInstallation' ) if not result['OK']: return S_ERROR( 'Invalid installation - missing /LocalInstallation section in the configuration' ) elif not result['Value']: return S_ERROR( 'Invalid installation - empty /LocalInstallation section in the configuration' ) if rootPath and not os.path.exists( rootPath ): return S_ERROR( 'Path "%s" does not exists' % rootPath ) # For LHCb we need to check Oracle client installOracleClient = False oracleFlag = gConfig.getValue( '/LocalInstallation/InstallOracleClient', 'unknown' ) if oracleFlag.lower() in ['yes', 'true', '1']: installOracleClient = True elif oracleFlag.lower() == "unknown": result = systemCall( 30, ['python', '-c', 'import cx_Oracle'] ) if result['OK'] and result['Value'][0] == 0: installOracleClient = True cmdList = ['dirac-install', '-r', version, '-t', 'server'] if rootPath: cmdList.extend( ['-P', rootPath] ) # Check if there are extensions extensionList = getCSExtensions() if extensionList: if "WebApp" in extensionList: extensionList.remove("WebApp") cmdList += ['-e', ','.join( extensionList )] webPortal = gConfig.getValue( '/LocalInstallation/WebApp', False ) # this is the new portal if webPortal: if "WebAppDIRAC" not in extensionList: extensionList.append( 'WebAppDIRAC' ) if extensionList: cmdList += ['-e', ','.join( extensionList )] # Are grid middleware bindings required ? if gridVersion: cmdList.extend( ['-g', gridVersion] ) targetPath = gConfig.getValue( '/LocalInstallation/TargetPath', gConfig.getValue( '/LocalInstallation/RootPath', '' ) ) if targetPath and os.path.exists( targetPath + '/etc/dirac.cfg' ): cmdList.append( targetPath + '/etc/dirac.cfg' ) else: return S_ERROR( 'Local configuration not found' ) result = systemCall( 240, cmdList ) if not result['OK']: return result status = result['Value'][0] if status != 0: # Get error messages error = [] output = result['Value'][1].split( '\n' ) for line in output: line = line.strip() if 'error' in line.lower(): error.append( line ) if error: message = '\n'.join( error ) else: message = "Failed to update software to %s" % version return S_ERROR( message ) # Check if there is a MySQL installation and fix the server scripts if necessary if os.path.exists( InstallTools.mysqlDir ): startupScript = os.path.join( InstallTools.instancePath, 'mysql', 'share', 'mysql', 'mysql.server' ) if not os.path.exists( startupScript ): startupScript = os.path.join( InstallTools.instancePath, 'pro', 'mysql', 'share', 'mysql', 'mysql.server' ) if os.path.exists( startupScript ): InstallTools.fixMySQLScripts( startupScript ) # For LHCb we need to check Oracle client if installOracleClient: result = systemCall( 30, 'install_oracle-client.sh' ) if not result['OK']: return result status = result['Value'][0] if status != 0: # Get error messages error = result['Value'][1].split( '\n' ) error.extend( result['Value'][2].split( '\n' ) ) error.append( 'Failed to install Oracle client module' ) return S_ERROR( '\n'.join( error ) ) if webPortal: # we have a to compile the new web portal... webappCompileScript = os.path.join( InstallTools.instancePath, 'pro', "WebAppDIRAC/scripts", "dirac-webapp-compile.py" ) outfile = "%s.out" % webappCompileScript err = "%s.err" % webappCompileScript result = systemCall( False, ['dirac-webapp-compile', ' > ', outfile, ' 2> ', err] ) if not result['OK']: return result if result['Value'][0] != 0: error = result['Value'][1].split( '\n' ) error.extend( result['Value'][2].split( '\n' ) ) error.append( 'Failed to compile the java script!' ) return S_ERROR( '\n'.join( error ) ) return S_OK()
def submitJob( self, executableFile, proxy, **kwargs ): """ Method to submit job (overriding base method). :param executableFile: file to execute via systemCall. Normally the JobWrapperTemplate when invoked by the JobAgent. :type executableFile: string :param proxy: the proxy used for running the job (the payload). It will be dumped to a file. :type proxy: string """ ret = getProxyInfo() if not ret['OK']: pilotProxy = None else: pilotProxy = ret['Value']['path'] self.log.notice( 'Pilot Proxy:', pilotProxy ) payloadEnv = dict( os.environ ) payloadProxy = '' renewTask = None if proxy: self.log.verbose( 'Setting up proxy for payload' ) result = self.writeProxyToFile( proxy ) if not result['OK']: return result payloadProxy = result['Value'] # proxy file location # pilotProxy = os.environ['X509_USER_PROXY'] payloadEnv[ 'X509_USER_PROXY' ] = payloadProxy self.log.verbose( 'Starting process for monitoring payload proxy' ) result = gThreadScheduler.addPeriodicTask( self.proxyCheckPeriod, self.monitorProxy, taskArgs = ( pilotProxy, payloadProxy ), executions = 0, elapsedTime = 0 ) if result[ 'OK' ]: renewTask = result[ 'Value' ] if not os.access( executableFile, 5 ): os.chmod( executableFile, stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH ) cmd = os.path.abspath( executableFile ) self.log.verbose( 'CE submission command: %s' % ( cmd ) ) result = systemCall( 0, cmd, callbackFunction = self.sendOutput, env = payloadEnv ) if payloadProxy: os.unlink( payloadProxy ) if renewTask: gThreadScheduler.removeTask( renewTask ) ret = S_OK() if not result['OK']: self.log.error( 'Fail to run InProcess', result['Message'] ) elif result['Value'][0] > 128: # negative exit values are returned as 256 - exit self.log.warn( 'InProcess Job Execution Failed' ) self.log.info( 'Exit status:', result['Value'][0] - 256 ) if result['Value'][0] - 256 == -2: error = 'JobWrapper initialization error' elif result['Value'][0] - 256 == -1: error = 'JobWrapper execution error' else: error = 'InProcess Job Execution Failed' res = S_ERROR( error ) res['Value'] = result['Value'][0] - 256 return res elif result['Value'][0] > 0: self.log.warn( 'Fail in payload execution' ) self.log.info( 'Exit status:', result['Value'][0] ) ret['PayloadFailed'] = result['Value'][0] else: self.log.debug( 'InProcess CE result OK' ) self.submittedJobs += 1 return ret
def checkUnusedCEs(): global vo, dry, ceBdiiDict, hostURL, glue2 gLogger.notice('looking for new computing resources in the BDII database...') result = getCEsFromCS() if not result['OK']: gLogger.error('ERROR: failed to get CEs from CS', result['Message']) DIRACExit(-1) knownCEs = result['Value'] result = getGridCEs(vo, ceBlackList=knownCEs, hostURL=hostURL, glue2=glue2) if not result['OK']: gLogger.error('ERROR: failed to get CEs from BDII', result['Message']) DIRACExit(-1) ceBdiiDict = result['BdiiInfo'] siteDict = result['Value'] if siteDict: gLogger.notice('New resources available:\n') for site in siteDict: diracSite = 'Unknown' result = getDIRACSiteName(site) if result['OK']: diracSite = ','.join(result['Value']) ces = siteDict[site].keys() # pylint: disable=no-member if ces: gLogger.notice(" %s, DIRAC site %s" % (site, diracSite)) for ce in ces: gLogger.notice(' ' * 4 + ce) gLogger.notice(' %s, %s' % (siteDict[site][ce]['CEType'], '%s_%s_%s' % siteDict[site][ce]['System'])) else: gLogger.notice('No new resources available, exiting') DIRACExit(0) inp = raw_input("\nDo you want to add sites ? [default=yes] [yes|no]: ") inp = inp.strip() if not inp and inp.lower().startswith('n'): gLogger.notice('Nothing else to be done, exiting') DIRACExit(0) gLogger.notice('\nAdding new sites/CEs interactively\n') sitesAdded = [] for site in siteDict: # Get the country code: country = '' ces = siteDict[site].keys() # pylint: disable=no-member for ce in ces: country = ce.strip().split('.')[-1].lower() if len(country) == 2: break if country == 'gov': country = 'us' break if not country or len(country) != 2: country = 'xx' result = getDIRACSiteName(site) if not result['OK']: gLogger.notice('\nThe site %s is not yet in the CS, give it a name' % site) diracSite = raw_input('[help|skip|<domain>.<name>.%s]: ' % country) if diracSite.lower() == "skip": continue if diracSite.lower() == "help": gLogger.notice('%s site details:' % site) for k, v in ceBdiiDict[site].items(): if k != "CEs": gLogger.notice('%s\t%s' % (k, v)) gLogger.notice('\nEnter DIRAC site name in the form <domain>.<name>.%s\n' % country) diracSite = raw_input('[<domain>.<name>.%s]: ' % country) try: _, _, _ = diracSite.split('.') except ValueError: gLogger.error('ERROR: DIRAC site name does not follow convention: %s' % diracSite) continue diracSites = [diracSite] else: diracSites = result['Value'] if len(diracSites) > 1: gLogger.notice('Attention! GOC site %s corresponds to more than one DIRAC sites:' % site) gLogger.notice(str(diracSites)) gLogger.notice('Please, pay attention which DIRAC site the new CEs will join\n') newCEs = {} addedCEs = [] for ce in ces: ceType = siteDict[site][ce]['CEType'] for diracSite in diracSites: if ce in addedCEs: continue yn = raw_input("Add CE %s of type %s to %s? [default yes] [yes|no]: " % (ce, ceType, diracSite)) if yn == '' or yn.lower() == 'y': newCEs.setdefault(diracSite, []) newCEs[diracSite].append(ce) addedCEs.append(ce) for diracSite in diracSites: if diracSite in newCEs: cmd = "dirac-admin-add-site %s %s %s" % (diracSite, site, ' '.join(newCEs[diracSite])) gLogger.notice("\nNew site/CEs will be added with command:\n%s" % cmd) yn = raw_input("Add it ? [default yes] [yes|no]: ") if not (yn == '' or yn.lower() == 'y'): continue if dry: gLogger.notice("Command is skipped in the dry run") else: result = systemCall(0, shlex.split(cmd)) if not result['OK']: gLogger.error('Error while executing dirac-admin-add-site command') yn = raw_input("Do you want to continue ? [default no] [yes|no]: ") if yn == '' or yn.lower().startswith('n'): if sitesAdded: gLogger.notice('CEs were added at the following sites:') for site, diracSite in sitesAdded: gLogger.notice("%s\t%s" % (site, diracSite)) DIRACExit(0) else: exitStatus, stdData, errData = result['Value'] if exitStatus: gLogger.error('Error while executing dirac-admin-add-site command\n', '\n'.join([stdData, errData])) yn = raw_input("Do you want to continue ? [default no] [yes|no]: ") if yn == '' or yn.lower().startswith('n'): if sitesAdded: gLogger.notice('CEs were added at the following sites:') for site, diracSite in sitesAdded: gLogger.notice("%s\t%s" % (site, diracSite)) DIRACExit(0) else: sitesAdded.append((site, diracSite)) gLogger.notice(stdData) if sitesAdded: gLogger.notice('CEs were added at the following sites:') for site, diracSite in sitesAdded: gLogger.notice("%s\t%s" % (site, diracSite)) else: gLogger.notice('No new CEs were added this time')
def submitJob( self, executableFile, proxy, dummy = None ): """ Method to submit job, should be overridden in sub-class. """ ret = getProxyInfo() if not ret['OK']: pilotProxy = None else: pilotProxy = ret['Value']['path'] self.log.notice( 'Pilot Proxy:', pilotProxy ) payloadEnv = dict( os.environ ) payloadProxy = '' if proxy: self.log.verbose( 'Setting up proxy for payload' ) result = self.writeProxyToFile( proxy ) if not result['OK']: return result payloadProxy = result['Value'] # pilotProxy = os.environ['X509_USER_PROXY'] payloadEnv[ 'X509_USER_PROXY' ] = payloadProxy self.log.verbose( 'Starting process for monitoring payload proxy' ) renewTask = None result = gThreadScheduler.addPeriodicTask( self.proxyCheckPeriod, self.monitorProxy, taskArgs = ( pilotProxy, payloadProxy ), executions = 0, elapsedTime = 0 ) if result[ 'OK' ]: renewTask = result[ 'Value' ] if not os.access( executableFile, 5 ): os.chmod( executableFile, stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH ) cmd = os.path.abspath( executableFile ) self.log.verbose( 'CE submission command: %s' % ( cmd ) ) result = systemCall( 0, cmd, callbackFunction = self.sendOutput, env = payloadEnv ) if payloadProxy: os.unlink( payloadProxy ) if renewTask: gThreadScheduler.removeTask( renewTask ) ret = S_OK() if not result['OK']: self.log.error( 'Fail to run InProcess', result['Message'] ) elif result['Value'][0] > 128: # negative exit values are returned as 256 - exit self.log.warn( 'InProcess Job Execution Failed' ) self.log.info( 'Exit status:', result['Value'][0] - 256 ) if result['Value'][0] - 256 == -2: error = 'Error in the initialization of the DIRAC JobWrapper' elif result['Value'][0] - 256 == -1: error = 'Error in the execution of the DIRAC JobWrapper' else: error = 'InProcess Job Execution Failed' res = S_ERROR( error ) res['Value'] = result['Value'][0] - 256 return res elif result['Value'][0] > 0: self.log.warn( 'Fail in payload execution' ) self.log.info( 'Exit status:', result['Value'][0] ) ret['PayloadFailed'] = result['Value'][0] else: self.log.debug( 'InProcess CE result OK' ) self.submittedJobs += 1 return ret
""" __RCSID__ = "$Id$" from DIRAC import gLogger, gConfig, S_OK, S_ERROR from DIRAC.Core.Base.AgentModule import AgentModule from DIRAC.WorkloadManagementSystem.DB.JobDB import JobDB from DIRAC.AccountingSystem.Client.Types.WMSHistory import WMSHistory from DIRAC.AccountingSystem.Client.DataStoreClient import DataStoreClient from DIRAC.Core.Utilities import Time import json try: import pika except Exception as e: from DIRAC.Core.Utilities.Subprocess import systemCall result = systemCall( False, ["pip", "install", "pika"] ) if not result['OK']: raise RuntimeError( result['Message'] ) else: import pika class StatesMonitoringAgent( AgentModule ): """ The specific agents must provide the following methods: - initialize() for initial settings - beginExecution() - execute() - the main method called in the agent cycle - endExecution() - finalize() - the graceful exit of the method, this one is usually used for the agent restart
def sourceEnv( timeout, cmdTuple, inputEnv = None ): """ Function to source configuration files in a platform dependent way and get back the environment """ # add appropriate extension to first element of the tuple (the command) envAsDict = '&& python -c "import os,sys ; print >> sys.stderr, os.environ"' # 1.- Choose the right version of the configuration file if DIRAC.platformTuple[0] == 'Windows': cmdTuple[0] += '.bat' else: cmdTuple[0] += '.sh' # 2.- Check that it exists if not os.path.exists( cmdTuple[0] ): result = DIRAC.S_ERROR( 'Missing script: %s' % cmdTuple[0] ) result['stdout'] = '' result['stderr'] = 'Missing script: %s' % cmdTuple[0] return result # Source it in a platform dependent way: # On windows the execution makes the environment to be inherit # On Linux or Darwin use bash and source the file. if DIRAC.platformTuple[0] == 'Windows': # this needs to be tested cmd = ' '.join( cmdTuple ) + envAsDict ret = shellCall( timeout, [ cmd ], env = inputEnv ) else: cmdTuple.insert( 0, 'source' ) cmd = ' '.join( cmdTuple ) + envAsDict ret = systemCall( timeout, [ '/bin/bash', '-c', cmd ], env = inputEnv ) # 3.- Now get back the result stdout = '' stderr = '' result = DIRAC.S_OK() if ret['OK']: # The Command has not timeout, retrieve stdout and stderr stdout = ret['Value'][1] stderr = ret['Value'][2] if ret['Value'][0] == 0: # execution was OK try: result['outputEnv'] = eval( stderr.split( '\n' )[-2] + '\n' ) stderr = '\n'.join( stderr.split( '\n' )[:-2] ) except Exception: stdout = cmd + '\n' + stdout result = DIRAC.S_ERROR( 'Could not parse Environment dictionary from stderr' ) else: # execution error stdout = cmd + '\n' + stdout result = DIRAC.S_ERROR( 'Execution returns %s' % ret['Value'][0] ) else: # Timeout stdout = cmd stderr = ret['Message'] result = DIRAC.S_ERROR( stderr ) # 4.- Put stdout and stderr in result structure result['stdout'] = stdout result['stderr'] = stderr return result