def test_CreateAndSubmit( self ): jobParams = {'JobID': '1', 'JobType': 'Merge', 'CPUTime': '1000000', 'Executable': '$DIRACROOT/scripts/dirac-jobexec', 'Arguments': "helloWorld.xml -o LogLevel=DEBUG pilot.cfg", 'ExtraOptions': 'pilot.cfg', 'InputSandbox': ['helloWorld.xml', 'exe-script.py']} resourceParams = {} optimizerParams = {} # res = createJobWrapper( 1, jobParams, resourceParams, optimizerParams, logLevel = 'DEBUG' ) # self.assert_( res['OK'] ) # wrapperFile = res['Value'] ceFactory = ComputingElementFactory() ceInstance = ceFactory.getCE( 'InProcess' ) self.assert_( ceInstance['OK'] ) computingElement = ceInstance['Value'] # res = computingElement.submitJob( wrapperFile, self.payloadProxy ) # self.assert_( res['OK'] ) res = createJobWrapper( 2, jobParams, resourceParams, optimizerParams, extraOptions = 'pilot.cfg', logLevel = 'DEBUG' ) self.assert_( res['OK'] ) wrapperFile = res['Value'] res = computingElement.submitJob( wrapperFile, self.payloadProxy ) self.assert_( res['OK'] )
def export_killPilot(self, pilotRefList ): """ Kill the specified pilots """ # Make a list if it is not yet pilotRefs = list( pilotRefList ) if type( pilotRefList ) in StringTypes: pilotRefs = [pilotRefList] # Regroup pilots per site and per owner pilotRefDict = {} for pilotReference in pilotRefs: result = pilotDB.getPilotInfo(pilotReference) if not result['OK'] or not result[ 'Value' ]: return S_ERROR('Failed to get info for pilot ' + pilotReference) pilotDict = result['Value'][pilotReference] owner = pilotDict['OwnerDN'] group = pilotDict['OwnerGroup'] queue = '@@@'.join( [owner, group, pilotDict['GridSite'], pilotDict['DestinationSite'], pilotDict['Queue']] ) gridType = pilotDict['GridType'] pilotRefDict.setdefault( queue, {} ) pilotRefDict[queue].setdefault( 'PilotList', [] ) pilotRefDict[queue]['PilotList'].append( pilotReference ) pilotRefDict[queue]['GridType'] = gridType # Do the work now queue by queue ceFactory = ComputingElementFactory() failed = [] for key, pilotDict in pilotRefDict.items(): owner,group,site,ce,queue = key.split( '@@@' ) result = getQueue( site, ce, queue ) if not result['OK']: return result queueDict = result['Value'] gridType = pilotDict['GridType'] result = ceFactory.getCE( gridType, ce, queueDict ) if not result['OK']: return result ce = result['Value'] if gridType in ["LCG","gLite","CREAM"]: group = getGroupOption(group,'VOMSRole',group) ret = gProxyManager.getPilotProxyFromVOMSGroup( owner, group ) if not ret['OK']: gLogger.error( ret['Message'] ) gLogger.error( 'Could not get proxy:', 'User "%s", Group "%s"' % ( owner, group ) ) return S_ERROR("Failed to get the pilot's owner proxy") proxy = ret['Value'] ce.setProxy( proxy ) pilotList = pilotDict['PilotList'] result = ce.killJob( pilotList ) if not result['OK']: failed.extend( pilotList ) if failed: return S_ERROR('Failed to kill at least some pilots') return S_OK()
def initialize(self, loops=0): """Sets default parameters and creates CE instance """ # Disable monitoring self.am_setOption('MonitoringEnabled', False) # self.log.setLevel('debug') #temporary for debugging self.am_setOption('MaxCycles', loops) ceType = self.am_getOption('CEType', 'InProcess') localCE = gConfig.getValue('/LocalSite/LocalCE', '') if localCE: self.log.info('Defining CE from local configuration = %s' % localCE) ceType = localCE # Create backend Computing Element ceFactory = ComputingElementFactory() self.ceName = ceType ceInstance = ceFactory.getCE(ceType) if not ceInstance['OK']: self.log.warn(ceInstance['Message']) return ceInstance self.computingElement = ceInstance['Value'] result = self.computingElement.getDescription() if not result['OK']: self.log.warn("Can not get the CE description") return result ceDict = result['Value'] self.timeLeft = ceDict.get('CPUTime', self.timeLeft) self.timeLeft = gConfig.getValue( '/Resources/Computing/CEDefaults/MaxCPUTime', self.timeLeft) self.initTimes = os.times() # Localsite options self.siteName = gConfig.getValue('/LocalSite/Site', self.siteName) self.pilotReference = gConfig.getValue('/LocalSite/PilotReference', self.pilotReference) self.defaultProxyLength = gConfig.getValue( '/Registry/DefaultProxyLifeTime', self.defaultProxyLength) # Agent options # This is the factor to convert raw CPU to Normalized units (based on the CPU Model) self.cpuFactor = gConfig.getValue('/LocalSite/CPUNormalizationFactor', self.cpuFactor) self.jobSubmissionDelay = self.am_getOption('SubmissionDelay', self.jobSubmissionDelay) self.fillingMode = self.am_getOption('FillingModeFlag', self.fillingMode) self.minimumTimeLeft = self.am_getOption('MinimumTimeLeft', self.minimumTimeLeft) self.stopOnApplicationFailure = self.am_getOption( 'StopOnApplicationFailure', self.stopOnApplicationFailure) self.stopAfterFailedMatches = self.am_getOption( 'StopAfterFailedMatches', self.stopAfterFailedMatches) self.extraOptions = gConfig.getValue( '/AgentJobRequirements/ExtraOptions', self.extraOptions) # Timeleft self.timeLeftUtil = TimeLeft() return S_OK()
def getQueues( self, resourceDict ): """ Get the list of relevant CEs and their descriptions """ self.queueDict = {} ceFactory = ComputingElementFactory() for site in resourceDict: for ce in resourceDict[site]: ceDict = resourceDict[site][ce] qDict = ceDict.pop( 'Queues' ) for queue in qDict: queueName = '%s_%s' % ( ce, queue ) self.queueDict[queueName] = {} self.queueDict[queueName]['ParametersDict'] = qDict[queue] self.queueDict[queueName]['ParametersDict']['Queue'] = queue self.queueDict[queueName]['ParametersDict']['Site'] = site self.queueDict[queueName]['ParametersDict']['GridEnv'] = self.gridEnv self.queueDict[queueName]['ParametersDict']['Setup'] = gConfig.getValue( '/DIRAC/Setup', 'unknown' ) # Evaluate the CPU limit of the queue according to the Glue convention # To Do: should be a utility if "maxCPUTime" in self.queueDict[queueName]['ParametersDict'] and \ "SI00" in self.queueDict[queueName]['ParametersDict']: maxCPUTime = float( self.queueDict[queueName]['ParametersDict']['maxCPUTime'] ) # For some sites there are crazy values in the CS maxCPUTime = max( maxCPUTime, 0 ) maxCPUTime = min( maxCPUTime, 86400 * 12.5 ) si00 = float( self.queueDict[queueName]['ParametersDict']['SI00'] ) queueCPUTime = 60. / 250. * maxCPUTime * si00 self.queueDict[queueName]['ParametersDict']['CPUTime'] = int( queueCPUTime ) qwDir = os.path.join( self.workingDirectory, queue ) if not os.path.exists( qwDir ): os.makedirs( qwDir ) self.queueDict[queueName]['ParametersDict']['WorkingDirectory'] = qwDir ceQueueDict = dict( ceDict ) ceQueueDict.update( self.queueDict[queueName]['ParametersDict'] ) result = ceFactory.getCE( ceName = ce, ceType = ceDict['CEType'], ceParametersDict = ceQueueDict ) if not result['OK']: return result self.queueDict[queueName]['CE'] = result['Value'] self.queueDict[queueName]['CEName'] = ce self.queueDict[queueName]['CEType'] = ceDict['CEType'] self.queueDict[queueName]['Site'] = site self.queueDict[queueName]['QueueName'] = queue result = self.queueDict[queueName]['CE'].isValid() if not result['OK']: self.log.fatal( result['Message'] ) return result if 'BundleProxy' in self.queueDict[queueName]['ParametersDict']: self.queueDict[queueName]['BundleProxy'] = True return S_OK()
def __createCE( self, ceName ): """ return a CE object for the given ceName """ self.log.info( "Creating %s CE" % ( ceName ) ) ceFactory = ComputingElementFactory() ret = ceFactory.getCE( ceName ) if not ret['OK']: self.log.warn( ret['Message'] ) return ret return ret
def initialize( self, loops = 0 ): """Sets default parameters and creates CE instance """ # Disable monitoring self.am_setOption( 'MonitoringEnabled', False ) # self.log.setLevel('debug') #temporary for debugging self.am_setOption( 'MaxCycles', loops ) ceType = self.am_getOption( 'CEType', 'InProcess' ) localCE = gConfig.getValue( '/LocalSite/LocalCE', '' ) if localCE: self.log.info( 'Defining CE from local configuration = %s' % localCE ) ceType = localCE # Create backend Computing Element ceFactory = ComputingElementFactory() self.ceName = ceType ceInstance = ceFactory.getCE( ceType ) if not ceInstance['OK']: self.log.warn( ceInstance['Message'] ) return ceInstance self.computingElement = ceInstance['Value'] result = self.computingElement.getDescription() if not result['OK']: self.log.warn( "Can not get the CE description" ) return result ceDict = result['Value'] self.timeLeft = ceDict.get( 'CPUTime', 0.0 ) self.timeLeft = gConfig.getValue( '/Resources/Computing/CEDefaults/MaxCPUTime', self.timeLeft ) self.initTimes = os.times() # Localsite options self.siteName = gConfig.getValue( '/LocalSite/Site', 'Unknown' ) self.pilotReference = gConfig.getValue( '/LocalSite/PilotReference', 'Unknown' ) self.defaultProxyLength = gConfig.getValue( '/Registry/DefaultProxyLifeTime', 86400 * 5 ) # Agent options # This is the factor to convert raw CPU to Normalized units (based on the CPU Model) self.cpuFactor = gConfig.getValue( '/LocalSite/CPUNormalizationFactor', 0.0 ) self.jobSubmissionDelay = self.am_getOption( 'SubmissionDelay', 10 ) self.fillingMode = self.am_getOption( 'FillingModeFlag', False ) self.minimumTimeLeft = self.am_getOption( 'MinimumTimeLeft', 1000 ) self.stopOnApplicationFailure = self.am_getOption( 'StopOnApplicationFailure', True ) self.stopAfterFailedMatches = self.am_getOption( 'StopAfterFailedMatches', 10 ) self.jobCount = 0 self.matchFailedCount = 0 self.extraOptions = gConfig.getValue( '/AgentJobRequirements/ExtraOptions', '' ) # Timeleft self.timeLeftUtil = TimeLeft() self.timeLeftError = '' self.scaledCPUTime = 0.0 self.pilotInfoReportedFlag = False return S_OK()
def initialize(self, loops=0): """Sets default parameters and creates CE instance """ # Disable monitoring self.am_setOption('MonitoringEnabled', False) # self.log.setLevel('debug') #temporary for debugging self.am_setOption('MaxCycles', loops) ceType = self.am_getOption('CEType', 'InProcess') localCE = gConfig.getValue('/LocalSite/LocalCE', '') if localCE: self.log.info('Defining CE from local configuration = %s' % localCE) ceType = localCE ceFactory = ComputingElementFactory() self.ceName = ceType ceInstance = ceFactory.getCE(ceType) if not ceInstance['OK']: self.log.warn(ceInstance['Message']) return ceInstance self.initTimes = os.times() self.computingElement = ceInstance['Value'] #Localsite options self.siteName = gConfig.getValue('/LocalSite/Site', 'Unknown') self.pilotReference = gConfig.getValue('/LocalSite/PilotReference', 'Unknown') self.defaultProxyLength = gConfig.getValue( '/Registry/DefaultProxyLifeTime', 86400 * 5) #Agent options # This is the factor to convert raw CPU to Normalized units (based on the CPU Model) self.cpuFactor = gConfig.getValue('/LocalSite/CPUNormalizationFactor', 0.0) self.jobSubmissionDelay = self.am_getOption('SubmissionDelay', 10) self.fillingMode = self.am_getOption('FillingModeFlag', False) self.stopOnApplicationFailure = self.am_getOption( 'StopOnApplicationFailure', True) self.stopAfterFailedMatches = self.am_getOption( 'StopAfterFailedMatches', 10) self.jobCount = 0 self.matchFailedCount = 0 self.extraOptions = gConfig.getValue( '/AgentJobRequirements/ExtraOptions', '') #Timeleft self.timeLeftUtil = TimeLeft() self.timeLeft = gConfig.getValue( '/Resources/Computing/CEDefaults/MaxCPUTime', 0.0) self.timeLeftError = '' self.scaledCPUTime = 0.0 self.pilotInfoReportedFlag = False return S_OK()
def initialize( self, loops = 0 ): """Sets default parameters and creates CE instance """ # Disable monitoring self.am_setOption( 'MonitoringEnabled', False ) # self.log.setLevel('debug') #temporary for debugging self.am_setOption( 'MaxCycles', loops ) ceType = self.am_getOption( 'CEType', 'InProcess' ) localCE = gConfig.getValue( '/LocalSite/LocalCE', '' ) if localCE: self.log.info( 'Defining CE from local configuration = %s' % localCE ) ceType = localCE ceFactory = ComputingElementFactory() self.ceName = ceType ceInstance = ceFactory.getCE( ceType ) if not ceInstance['OK']: self.log.warn( ceInstance['Message'] ) return ceInstance self.initTimes = os.times() self.computingElement = ceInstance['Value'] self.diracRoot = os.path.dirname( os.path.dirname( os.path.dirname( os.path.dirname( __file__ ) ) ) ) #Localsite options self.siteRoot = gConfig.getValue( '/LocalSite/Root', os.getcwd() ) self.siteName = gConfig.getValue( '/LocalSite/Site', 'Unknown' ) self.pilotReference = gConfig.getValue( '/LocalSite/PilotReference', 'Unknown' ) self.defaultProxyLength = gConfig.getValue( '/Registry/DefaultProxyLifeTime', 86400 * 5 ) #Agent options # This is the factor to convert raw CPU to Normalized units (based on the CPU Model) self.cpuFactor = gConfig.getValue( '/LocalSite/CPUNormalizationFactor', 0.0 ) defaultWrapperLocation = 'DIRAC/WorkloadManagementSystem/JobWrapper/JobWrapperTemplate.py' self.jobWrapperTemplate = os.path.join( self.diracRoot, self.am_getOption( 'JobWrapperTemplate', defaultWrapperLocation ) ) self.jobSubmissionDelay = self.am_getOption( 'SubmissionDelay', 10 ) self.defaultLogLevel = self.am_getOption( 'DefaultLogLevel', 'info' ) self.fillingMode = self.am_getOption( 'FillingModeFlag', False ) self.stopOnApplicationFailure = self.am_getOption( 'StopOnApplicationFailure', True ) self.stopAfterFailedMatches = self.am_getOption( 'StopAfterFailedMatches', 10 ) self.jobCount = 0 self.matchFailedCount = 0 #Timeleft self.timeLeftUtil = TimeLeft() self.timeLeft = gConfig.getValue( '/Resources/Computing/CEDefaults/MaxCPUTime', 0.0 ) self.gridCEQueue = gConfig.getValue( '/Resources/Computing/CEDefaults/GridCEQueue', '' ) self.timeLeftError = '' self.scaledCPUTime = 0.0 self.pilotInfoReportedFlag = False return S_OK()
def addComputingElement(self, ceList): """ Check if a CE object for the current CE is available, instantiate one if necessary """ for CE in ceList: if CE not in self.computingElementDict: ceFactory = ComputingElementFactory( ) ceInstance = ceFactory.getCE( ceName = CE ) if not ceInstance['OK']: self.log.error('Can not create CE object:', ceInstance['Message']) return self.computingElementDict[CE] = ceInstance['Value'].ceConfigDict # add the 'CE' instance at the end to avoid being overwritten self.computingElementDict[CE]['CE'] = ceInstance['Value']
def getQueues( self, resourceDict ): """ Get the list of relevant CEs and their descriptions """ self.queueDict = {} ceFactory = ComputingElementFactory() for site in resourceDict: for ce in resourceDict[site]: ceDict = resourceDict[site][ce] ceTags = ceDict.get( 'Tag' ) if isinstance( ceTags, basestring ): ceTags = fromChar( ceTags ) qDict = ceDict.pop( 'Queues' ) for queue in qDict: queueName = '%s_%s' % ( ce, queue ) self.queueDict[queueName] = {} self.queueDict[queueName]['ParametersDict'] = qDict[queue] self.queueDict[queueName]['ParametersDict']['Queue'] = queue self.queueDict[queueName]['ParametersDict']['Site'] = site self.queueDict[queueName]['ParametersDict']['GridEnv'] = self.gridEnv self.queueDict[queueName]['ParametersDict']['Setup'] = gConfig.getValue( '/DIRAC/Setup', 'unknown' ) # Evaluate the CPU limit of the queue according to the Glue convention # To Do: should be a utility if "maxCPUTime" in self.queueDict[queueName]['ParametersDict'] and \ "SI00" in self.queueDict[queueName]['ParametersDict']: maxCPUTime = float( self.queueDict[queueName]['ParametersDict']['maxCPUTime'] ) # For some sites there are crazy values in the CS maxCPUTime = max( maxCPUTime, 0 ) maxCPUTime = min( maxCPUTime, 86400 * 12.5 ) si00 = float( self.queueDict[queueName]['ParametersDict']['SI00'] ) queueCPUTime = 60. / 250. * maxCPUTime * si00 self.queueDict[queueName]['ParametersDict']['CPUTime'] = int( queueCPUTime ) queueTags = self.queueDict[queueName]['ParametersDict'].get( 'Tag' ) if queueTags and isinstance( queueTags, basestring ): queueTags = fromChar( queueTags ) self.queueDict[queueName]['ParametersDict']['Tag'] = queueTags if ceTags: if queueTags: allTags = list( set( ceTags + queueTags ) ) self.queueDict[queueName]['ParametersDict']['Tag'] = allTags else: self.queueDict[queueName]['ParametersDict']['Tag'] = ceTags maxMemory = self.queueDict[queueName]['ParametersDict'].get( 'MaxRAM', None ) if maxMemory: # MaxRAM value is supposed to be in MB maxMemoryList = range( 1, int( maxMemory )/1000 + 1 ) memoryTags = [ '%dGB' % mem for mem in maxMemoryList ] if memoryTags: self.queueDict[queueName]['ParametersDict'].setdefault( 'Tag', [] ) self.queueDict[queueName]['ParametersDict']['Tag'] += memoryTags qwDir = os.path.join( self.workingDirectory, queue ) if not os.path.exists( qwDir ): os.makedirs( qwDir ) self.queueDict[queueName]['ParametersDict']['WorkingDirectory'] = qwDir platform = '' if "Platform" in self.queueDict[queueName]['ParametersDict']: platform = self.queueDict[queueName]['ParametersDict']['Platform'] elif "Platform" in ceDict: platform = ceDict['Platform'] elif "OS" in ceDict: architecture = ceDict.get( 'architecture', 'x86_64' ) OS = ceDict['OS'] platform = '_'.join( [architecture, OS] ) if platform and not platform in self.platforms: self.platforms.append( platform ) if not "Platform" in self.queueDict[queueName]['ParametersDict'] and platform: result = Resources.getDIRACPlatform( platform ) if result['OK']: self.queueDict[queueName]['ParametersDict']['Platform'] = result['Value'][0] ceQueueDict = dict( ceDict ) ceQueueDict.update( self.queueDict[queueName]['ParametersDict'] ) # Generate the CE object for the queue or pick the already existing one # if the queue definition did not change queueHash = self.__generateQueueHash( ceQueueDict ) if queueName in self.queueCECache and self.queueCECache[queueName]['Hash'] == queueHash: queueCE = self.queueCECache[queueName]['CE'] else: result = ceFactory.getCE( ceName = ce, ceType = ceDict['CEType'], ceParametersDict = ceQueueDict ) if not result['OK']: return result self.queueCECache.setdefault( queueName, {} ) self.queueCECache[queueName]['Hash'] = queueHash self.queueCECache[queueName]['CE'] = result['Value'] queueCE = self.queueCECache[queueName]['CE'] self.queueDict[queueName]['CE'] = queueCE self.queueDict[queueName]['CEName'] = ce self.queueDict[queueName]['CEType'] = ceDict['CEType'] self.queueDict[queueName]['Site'] = site self.queueDict[queueName]['QueueName'] = queue self.queueDict[queueName]['Platform'] = platform result = self.queueDict[queueName]['CE'].isValid() if not result['OK']: self.log.fatal( result['Message'] ) return result if 'BundleProxy' in self.queueDict[queueName]['ParametersDict']: if self.queueDict[queueName]['ParametersDict']['BundleProxy'].lower() in ['true','yes','1']: self.queueDict[queueName]['BundleProxy'] = True elif 'BundleProxy' in ceDict: if ceDict['BundleProxy'].lower() in ['true','yes','1']: self.queueDict[queueName]['BundleProxy'] = True if site not in self.sites: self.sites.append( site ) return S_OK()
def __getGridJobOutput(self,pilotReference): """ Get the pilot job standard output and standard error files for the Grid job reference """ result = pilotDB.getPilotInfo(pilotReference) if not result['OK'] or not result[ 'Value' ]: return S_ERROR('Failed to get info for pilot ' + pilotReference) pilotDict = result['Value'][pilotReference] owner = pilotDict['OwnerDN'] group = pilotDict['OwnerGroup'] # FIXME: What if the OutputSandBox is not StdOut and StdErr, what do we do with other files? result = pilotDB.getPilotOutput(pilotReference) if result['OK']: stdout = result['Value']['StdOut'] error = result['Value']['StdErr'] if stdout or error: resultDict = {} resultDict['StdOut'] = stdout resultDict['StdErr'] = error resultDict['OwnerDN'] = owner resultDict['OwnerGroup'] = group resultDict['FileList'] = [] return S_OK(resultDict) else: gLogger.warn( 'Empty pilot output found for %s' % pilotReference ) gridType = pilotDict['GridType'] if gridType in ["LCG","gLite","CREAM"]: group = getGroupOption(group,'VOMSRole',group) ret = gProxyManager.getPilotProxyFromVOMSGroup( owner, group ) if not ret['OK']: gLogger.error( ret['Message'] ) gLogger.error( 'Could not get proxy:', 'User "%s", Group "%s"' % ( owner, group ) ) return S_ERROR("Failed to get the pilot's owner proxy") proxy = ret['Value'] pilotStamp = pilotDict['PilotStamp'] result = getPilotOutput( proxy, gridType, pilotReference, pilotStamp ) if not result['OK']: return S_ERROR('Failed to get pilot output: '+result['Message']) # FIXME: What if the OutputSandBox is not StdOut and StdErr, what do we do with other files? stdout = result['StdOut'] error = result['StdErr'] fileList = result['FileList'] if stdout: result = pilotDB.storePilotOutput(pilotReference,stdout,error) if not result['OK']: gLogger.error('Failed to store pilot output:',result['Message']) resultDict = {} resultDict['StdOut'] = stdout resultDict['StdErr'] = error resultDict['OwnerDN'] = owner resultDict['OwnerGroup'] = group resultDict['FileList'] = fileList return S_OK(resultDict) else: # Instantiate the appropriate CE ceFactory = ComputingElementFactory() result = getQueue( pilotDict['GridSite'], pilotDict['DestinationSite'], pilotDict['Queue'] ) if not result['OK']: return result queueDict = result['Value'] result = ceFactory.getCE( gridType, pilotDict['DestinationSite'], queueDict ) if not result['OK']: return result ce = result['Value'] pilotStamp = pilotDict['PilotStamp'] pRef = pilotReference if pilotStamp: pRef = pRef + ':::' + pilotStamp result = ce.getJobOutput( pRef ) if not result['OK']: return result stdout,error = result['Value'] if stdout: result = pilotDB.storePilotOutput(pilotReference,stdout,error) if not result['OK']: gLogger.error('Failed to store pilot output:',result['Message']) resultDict = {} resultDict['StdOut'] = stdout resultDict['StdErr'] = error resultDict['OwnerDN'] = owner resultDict['OwnerGroup'] = group resultDict['FileList'] = [] return S_OK( resultDict )
def getQueues( self ): """ Get the list of relevant CEs and their descriptions """ ceFactory = ComputingElementFactory() ceTypes = self.am_getOption( 'CETypes', [] ) ceConfList = self.am_getOption( 'CEs', [] ) for siteName in self.siteNames: # Look up CE definitions in the site CS description ceList = [] gridType = siteName.split( '.' )[0] result = gConfig.getSections( '/Resources/Sites/%s/%s/CEs' % ( gridType, siteName ) ) if not result['OK']: return S_ERROR( 'Failed to look up the CS for the site %s CEs' % siteName ) if not result['Value']: return S_ERROR( 'No CEs found for site %s' % siteName ) ceTotalList = result['Value'] for ce in ceTotalList: if ( ceConfList and ce in ceConfList ) or not ceConfList: ceType = gConfig.getValue( '/Resources/Sites/%s/%s/CEs/%s/CEType' % ( gridType, siteName, ce ), 'Unknown' ) result = gConfig.getOptionsDict( '/Resources/Sites/%s/%s/CEs/%s' % ( gridType, siteName, ce ) ) if not result['OK']: return S_ERROR( 'Failed to look up the CS for ce %s' % ce ) ceDict = result['Value'] if "SubmissionMode" in ceDict and ceDict['SubmissionMode'].lower() == "direct": if ceType in ceTypes: ceList.append( ( ce, ceType, ceDict ) ) for ce, ceType, ceDict in ceList: section = '/Resources/Sites/%s/%s/CEs/%s/Queues' % ( gridType, siteName, ce ) result = gConfig.getSections( section ) if not result['OK']: return S_ERROR( 'Failed to look up the CS for queues' ) if not result['Value']: return S_ERROR( 'No Queues found for site %s, ce %s' % ( siteName, ce ) ) queues = result['Value'] for queue in queues: result = gConfig.getOptionsDict( '%s/%s' % ( section, queue ) ) if not result['OK']: return S_ERROR( 'Failed to look up the CS for ce,queue %s,%s' % ( ce, queue ) ) queueName = '%s_%s' % ( ce, queue ) self.queueDict[queueName] = {} self.queueDict[queueName]['ParametersDict'] = result['Value'] self.queueDict[queueName]['ParametersDict']['Queue'] = queue self.queueDict[queueName]['ParametersDict']['Site'] = siteName self.queueDict[queueName]['ParametersDict']['GridEnv'] = self.gridEnv self.queueDict[queueName]['ParametersDict']['Setup'] = gConfig.getValue( '/DIRAC/Setup', 'unknown' ) # Evaluate the CPU limit of the queue according to the Glue convention # To Do: should be a utility if "maxCPUTime" in self.queueDict[queueName]['ParametersDict'] and \ "SI00" in self.queueDict[queueName]['ParametersDict']: maxCPUTime = float( self.queueDict[queueName]['ParametersDict']['maxCPUTime'] ) # For some sites there are crazy values in the CS maxCPUTime = max( maxCPUTime, 0 ) maxCPUTime = min( maxCPUTime, 86400 * 12.5 ) si00 = float( self.queueDict[queueName]['ParametersDict']['SI00'] ) queueCPUTime = 60. / 250. * maxCPUTime * si00 self.queueDict[queueName]['ParametersDict']['CPUTime'] = int( queueCPUTime ) qwDir = os.path.join( self.workingDirectory, queue ) if not os.path.exists( qwDir ): os.mkdir( qwDir ) self.queueDict[queueName]['ParametersDict']['WorkingDirectory'] = qwDir queueDict = dict( ceDict ) queueDict.update( self.queueDict[queueName]['ParametersDict'] ) result = ceFactory.getCE( ceName = ce, ceType = ceType, ceParametersDict = queueDict ) if not result['OK']: return result self.queueDict[queueName]['CE'] = result['Value'] self.queueDict[queueName]['CEName'] = ce self.queueDict[queueName]['CEType'] = ceType self.queueDict[queueName]['Site'] = siteName self.queueDict[queueName]['QueueName'] = queue result = self.queueDict[queueName]['CE'].isValid() if not result['OK']: self.log.fatal( result['Message'] ) return result if 'BundleProxy' in self.queueDict[queueName]['ParametersDict']: self.queueDict[queueName]['BundleProxy'] = True return S_OK()
def getQueues( self, resourceDict ): """ Get the list of relevant CEs and their descriptions """ self.queueDict = {} ceFactory = ComputingElementFactory() for site in resourceDict: for ce in resourceDict[site]: ceDict = resourceDict[site][ce] ceTags = ceDict.get( 'Tag', [] ) pilotRunDirectory = ceDict.get( 'PilotRunDirectory', '' ) if isinstance( ceTags, basestring ): ceTags = fromChar( ceTags ) ceMaxRAM = ceDict.get( 'MaxRAM', None ) qDict = ceDict.pop( 'Queues' ) for queue in qDict: queueName = '%s_%s' % ( ce, queue ) self.queueDict[queueName] = {} self.queueDict[queueName]['ParametersDict'] = qDict[queue] self.queueDict[queueName]['ParametersDict']['Queue'] = queue self.queueDict[queueName]['ParametersDict']['Site'] = site self.queueDict[queueName]['ParametersDict']['GridEnv'] = self.gridEnv self.queueDict[queueName]['ParametersDict']['Setup'] = gConfig.getValue( '/DIRAC/Setup', 'unknown' ) # Evaluate the CPU limit of the queue according to the Glue convention # To Do: should be a utility if "maxCPUTime" in self.queueDict[queueName]['ParametersDict'] and \ "SI00" in self.queueDict[queueName]['ParametersDict']: maxCPUTime = float( self.queueDict[queueName]['ParametersDict']['maxCPUTime'] ) # For some sites there are crazy values in the CS maxCPUTime = max( maxCPUTime, 0 ) maxCPUTime = min( maxCPUTime, 86400 * 12.5 ) si00 = float( self.queueDict[queueName]['ParametersDict']['SI00'] ) queueCPUTime = 60. / 250. * maxCPUTime * si00 self.queueDict[queueName]['ParametersDict']['CPUTime'] = int( queueCPUTime ) queueTags = self.queueDict[queueName]['ParametersDict'].get( 'Tag' ) if queueTags and isinstance( queueTags, basestring ): queueTags = fromChar( queueTags ) self.queueDict[queueName]['ParametersDict']['Tag'] = queueTags if ceTags: if queueTags: allTags = list( set( ceTags + queueTags ) ) self.queueDict[queueName]['ParametersDict']['Tag'] = allTags else: self.queueDict[queueName]['ParametersDict']['Tag'] = ceTags maxRAM = self.queueDict[queueName]['ParametersDict'].get( 'MaxRAM' ) maxRAM = ceMaxRAM if not maxRAM else maxRAM if maxRAM: self.queueDict[queueName]['ParametersDict']['MaxRAM'] = maxRAM if pilotRunDirectory: self.queueDict[queueName]['ParametersDict']['JobExecDir'] = pilotRunDirectory qwDir = os.path.join( self.workingDirectory, queue ) mkDir(qwDir) self.queueDict[queueName]['ParametersDict']['WorkingDirectory'] = qwDir platform = '' if "Platform" in self.queueDict[queueName]['ParametersDict']: platform = self.queueDict[queueName]['ParametersDict']['Platform'] elif "Platform" in ceDict: platform = ceDict['Platform'] elif "OS" in ceDict: architecture = ceDict.get( 'architecture', 'x86_64' ) OS = ceDict['OS'] platform = '_'.join( [architecture, OS] ) if platform and not platform in self.platforms: self.platforms.append( platform ) if not "Platform" in self.queueDict[queueName]['ParametersDict'] and platform: result = Resources.getDIRACPlatform( platform ) if result['OK']: self.queueDict[queueName]['ParametersDict']['Platform'] = result['Value'][0] ceQueueDict = dict( ceDict ) ceQueueDict.update( self.queueDict[queueName]['ParametersDict'] ) # Generate the CE object for the queue or pick the already existing one # if the queue definition did not change queueHash = self.__generateQueueHash( ceQueueDict ) if queueName in self.queueCECache and self.queueCECache[queueName]['Hash'] == queueHash: queueCE = self.queueCECache[queueName]['CE'] else: result = ceFactory.getCE( ceName = ce, ceType = ceDict['CEType'], ceParametersDict = ceQueueDict ) if not result['OK']: return result self.queueCECache.setdefault( queueName, {} ) self.queueCECache[queueName]['Hash'] = queueHash self.queueCECache[queueName]['CE'] = result['Value'] queueCE = self.queueCECache[queueName]['CE'] self.queueDict[queueName]['CE'] = queueCE self.queueDict[queueName]['CEName'] = ce self.queueDict[queueName]['CEType'] = ceDict['CEType'] self.queueDict[queueName]['Site'] = site self.queueDict[queueName]['QueueName'] = queue self.queueDict[queueName]['Platform'] = platform result = self.queueDict[queueName]['CE'].isValid() if not result['OK']: self.log.fatal( result['Message'] ) return result if 'BundleProxy' in self.queueDict[queueName]['ParametersDict']: if self.queueDict[queueName]['ParametersDict']['BundleProxy'].lower() in ['true','yes','1']: self.queueDict[queueName]['BundleProxy'] = True elif 'BundleProxy' in ceDict: if ceDict['BundleProxy'].lower() in ['true','yes','1']: self.queueDict[queueName]['BundleProxy'] = True if site not in self.sites: self.sites.append( site ) return S_OK()
def getQueues(self, resourceDict): """ Get the list of relevant CEs and their descriptions """ self.queueDict = {} ceFactory = ComputingElementFactory() for site in resourceDict: for ce in resourceDict[site]: ceDict = resourceDict[site][ce] qDict = ceDict.pop("Queues") for queue in qDict: queueName = "%s_%s" % (ce, queue) self.queueDict[queueName] = {} self.queueDict[queueName]["ParametersDict"] = qDict[queue] self.queueDict[queueName]["ParametersDict"]["Queue"] = queue self.queueDict[queueName]["ParametersDict"]["Site"] = site self.queueDict[queueName]["ParametersDict"]["GridEnv"] = self.gridEnv self.queueDict[queueName]["ParametersDict"]["Setup"] = gConfig.getValue("/DIRAC/Setup", "unknown") # Evaluate the CPU limit of the queue according to the Glue convention # To Do: should be a utility if ( "maxCPUTime" in self.queueDict[queueName]["ParametersDict"] and "SI00" in self.queueDict[queueName]["ParametersDict"] ): maxCPUTime = float(self.queueDict[queueName]["ParametersDict"]["maxCPUTime"]) # For some sites there are crazy values in the CS maxCPUTime = max(maxCPUTime, 0) maxCPUTime = min(maxCPUTime, 86400 * 12.5) si00 = float(self.queueDict[queueName]["ParametersDict"]["SI00"]) queueCPUTime = 60.0 / 250.0 * maxCPUTime * si00 self.queueDict[queueName]["ParametersDict"]["CPUTime"] = int(queueCPUTime) qwDir = os.path.join(self.workingDirectory, queue) if not os.path.exists(qwDir): os.makedirs(qwDir) self.queueDict[queueName]["ParametersDict"]["WorkingDirectory"] = qwDir platform = "" if "Platform" in self.queueDict[queueName]["ParametersDict"]: platform = self.queueDict[queueName]["ParametersDict"]["Platform"] elif "Platform" in ceDict: platform = ceDict["Platform"] elif "OS" in ceDict: architecture = ceDict.get("architecture", "x86_64") OS = ceDict["OS"] platform = "_".join([architecture, OS]) if platform and not platform in self.platforms: self.platforms.append(platform) if not "Platform" in self.queueDict[queueName]["ParametersDict"] and platform: result = Resources.getDIRACPlatform(platform) if result["OK"]: self.queueDict[queueName]["ParametersDict"]["Platform"] = result["Value"] ceQueueDict = dict(ceDict) ceQueueDict.update(self.queueDict[queueName]["ParametersDict"]) result = ceFactory.getCE(ceName=ce, ceType=ceDict["CEType"], ceParametersDict=ceQueueDict) if not result["OK"]: return result self.queueDict[queueName]["CE"] = result["Value"] self.queueDict[queueName]["CEName"] = ce self.queueDict[queueName]["CEType"] = ceDict["CEType"] self.queueDict[queueName]["Site"] = site self.queueDict[queueName]["QueueName"] = queue result = self.queueDict[queueName]["CE"].isValid() if not result["OK"]: self.log.fatal(result["Message"]) return result if "BundleProxy" in self.queueDict[queueName]["ParametersDict"]: self.queueDict[queueName]["BundleProxy"] = True elif "BundleProxy" in ceDict: self.queueDict[queueName]["BundleProxy"] = True if site not in self.sites: self.sites.append(site) return S_OK()