def __configurePilot(basepath, vo): """ Configures the pilot. This method was created specifically for LHCb pilots, more info about othe VOs is needed to make it more general. """ from DIRAC.ConfigurationSystem.Client.Helpers.CSGlobals import getVO, getSetup from DIRAC.ConfigurationSystem.Client.ConfigurationData import gConfigurationData vo = getVO() currentSetup = getSetup() masterCS = gConfigurationData.getMasterServer() os.system( "python " + basepath + "dirac-pilot.py -S %s -l %s -C %s -N ce.debug.ch -Q default -n DIRAC.JobDebugger.ch --pythonVersion=3 -dd" % (currentSetup, vo, masterCS)) diracdir = os.path.expanduser("~") + os.path.sep try: os.rename(diracdir + ".dirac.cfg", diracdir + ".dirac.cfg.old") except OSError: pass shutil.copyfile(diracdir + "pilot.cfg", diracdir + ".dirac.cfg")
def __refreshAndPublish(self): self.__lastUpdateTime = time.time() gLogger.info("Refreshing from master server") from DIRAC.Core.DISET.RPCClient import RPCClient sMasterServer = gConfigurationData.getMasterServer() if sMasterServer: oClient = RPCClient( sMasterServer, timeout=self.__timeout, useCertificates=gConfigurationData.useServerCertificate(), skipCACheck=gConfigurationData.skipCACheck()) dRetVal = _updateFromRemoteLocation(oClient) if not dRetVal['OK']: gLogger.error("Can't update from master server", dRetVal['Message']) return False if gConfigurationData.getAutoPublish(): gLogger.info("Publishing to master server...") dRetVal = oClient.publishSlaveServer(self.__url) if not dRetVal['OK']: gLogger.error("Can't publish to master server", dRetVal['Message']) return True else: gLogger.warn( "No master server is specified in the configuration, trying to get data from other slaves" ) return self.__refresh()['OK']
def do_connect(self, line): """connect Connect to the CS Usage: connect <URL> (Connect to the CS at the specified URL) connect (Connect to the default CS URL of your config) """ if line == "": self.serverURL = gConfigurationData.getMasterServer() self.serverName = gConfigurationData.getName() else: self.serverURL = self.serverName = line if self.serverURL == None: print "Unable to connect to the default server. Maybe you don't have a proxy ?" return self.do_disconnect("") print "Trying to connect to " + self.serverURL + "...", self.modificator = Modificator(RPCClient(self.serverURL)) rv = self.modificator.loadFromRemote() rv2 = self.modificator.loadCredentials() if rv['OK'] == False or rv2['OK'] == False: print "failed: ", if rv['OK'] == False: print rv['Message'] else: print rv2['Message'] self.connected = False self.update_prompt()
def do_connect(self, line): """connect Connect to the CS Usage: connect <URL> (Connect to the CS at the specified URL) connect (Connect to the default CS URL of your config) """ if line == "": self.serverURL = gConfigurationData.getMasterServer() self.serverName = gConfigurationData.getName() else: self.serverURL = self.serverName = line if self.serverURL is None: print("Unable to connect to the default server. Maybe you don't have a proxy ?") return self.do_disconnect("") print("Trying to connect to " + self.serverURL + "...", end=" ") self.modificator = Modificator(ConfigurationClient(url=self.serverURL)) rv = self.modificator.loadFromRemote() rv2 = self.modificator.loadCredentials() if rv["OK"] == False or rv2["OK"] == False: print("failed: ", end=" ") if rv["OK"] is False: print(rv["Message"]) else: print(rv2["Message"]) self.connected = False self.update_prompt() else: self.connected = True self.update_prompt() print("done.")
def __refreshAndPublish(self): self.__lastUpdateTime = time.time() gLogger.info("Refreshing from master server") from DIRAC.Core.DISET.RPCClient import RPCClient sMasterServer = gConfigurationData.getMasterServer() if sMasterServer: oClient = RPCClient( sMasterServer, timeout=self.__timeout, useCertificates=gConfigurationData.useServerCertificate(), skipCACheck=gConfigurationData.skipCACheck(), ) dRetVal = _updateFromRemoteLocation(oClient) if not dRetVal["OK"]: gLogger.error("Can't update from master server", dRetVal["Message"]) return False if gConfigurationData.getAutoPublish(): gLogger.info("Publishing to master server...") dRetVal = oClient.publishSlaveServer(self.__url) if not dRetVal["OK"]: gLogger.error("Can't publish to master server", dRetVal["Message"]) return True else: gLogger.warn("No master server is specified in the configuration, trying to get data from other slaves") return self.__refresh()["OK"]
def _refreshAndPublish(self): """ Refresh configuration and publish local updates """ self._lastUpdateTime = time.time() gLogger.info("Refreshing from master server") sMasterServer = gConfigurationData.getMasterServer() if sMasterServer: from DIRAC.ConfigurationSystem.Client.ConfigurationClient import ConfigurationClient oClient = ConfigurationClient( url=sMasterServer, timeout=self._timeout, useCertificates=gConfigurationData.useServerCertificate(), skipCACheck=gConfigurationData.skipCACheck(), ) dRetVal = _updateFromRemoteLocation(oClient) if not dRetVal["OK"]: gLogger.error("Can't update from master server", dRetVal["Message"]) return False if gConfigurationData.getAutoPublish(): gLogger.info("Publishing to master server...") dRetVal = oClient.publishSlaveServer(self._url) if not dRetVal["OK"]: gLogger.error("Can't publish to master server", dRetVal["Message"]) return True else: gLogger.warn( "No master server is specified in the configuration, trying to get data from other slaves" ) return self._refresh()["OK"]
def _refresh(self, fromMaster=False): """ Refresh configuration """ self._lastUpdateTime = time.time() gLogger.debug("Refreshing configuration...") gatewayList = getGatewayURLs("Configuration/Server") updatingErrorsList = [] if gatewayList: initialServerList = gatewayList gLogger.debug("Using configuration gateway", str(initialServerList[0])) elif fromMaster: masterServer = gConfigurationData.getMasterServer() initialServerList = [masterServer] gLogger.debug("Refreshing from master %s" % masterServer) else: initialServerList = gConfigurationData.getServers() gLogger.debug("Refreshing from list %s" % str(initialServerList)) # If no servers in the initial list, we are supposed to use the local configuration only if not initialServerList: return S_OK() randomServerList = List.randomize(initialServerList) gLogger.debug("Randomized server list is %s" % ", ".join(randomServerList)) for sServer in randomServerList: from DIRAC.ConfigurationSystem.Client.ConfigurationClient import ConfigurationClient oClient = ConfigurationClient( url=sServer, useCertificates=gConfigurationData.useServerCertificate(), skipCACheck=gConfigurationData.skipCACheck(), ) dRetVal = _updateFromRemoteLocation(oClient) if dRetVal["OK"]: self._refreshTime = gConfigurationData.getRefreshTime() return dRetVal else: updatingErrorsList.append(dRetVal["Message"]) gLogger.warn( "Can't update from server", "Error while updating from %s: %s" % (sServer, dRetVal["Message"])) if dRetVal["Message"].find("Insane environment") > -1: break return S_ERROR("Reason(s):\n\t%s" % "\n\t".join(List.uniqueElements(updatingErrorsList)))
def do_connect( self, args = '' ): """ Connects to configuration master server (in specified url if provided). Usage: connect <url> """ if not args or type( args ) not in types.StringTypes: self.masterURL = gConfigurationData.getMasterServer() if self.masterURL != "unknown" and self.masterURL: self._tryConnection() else: self._setStatus( False ) else: splitted = args.split() if len( splitted ) == 0: print "Must specify witch url to connect" self._setStatus( False ) else: self.masterURL = splitted[0].strip() self._tryConnection()
def do_connect(self, args=''): """ Connects to configuration master server (in specified url if provided). Usage: connect <url> """ if not args or not isinstance(args, six.string_types): self.masterURL = gConfigurationData.getMasterServer() if self.masterURL != "unknown" and self.masterURL: self._tryConnection() else: self._setStatus(False) else: splitted = args.split() if len(splitted) == 0: print("Must specify witch url to connect") self._setStatus(False) else: self.masterURL = splitted[0].strip() self._tryConnection()
def __refresh( self, fromMaster = False ): self.__lastUpdateTime = time.time() gLogger.debug( "Refreshing configuration..." ) gatewayList = getGatewayURLs( "Configuration/Server" ) updatingErrorsList = [] if gatewayList: initialServerList = gatewayList gLogger.debug( "Using configuration gateway", str( initialServerList[0] ) ) elif fromMaster: masterServer = gConfigurationData.getMasterServer() initialServerList = [masterServer] gLogger.debug( "Refreshing from master %s" % masterServer ) else: initialServerList = gConfigurationData.getServers() gLogger.debug( "Refreshing from list %s" % str( initialServerList ) ) # If no servers in the initial list, we are supposed to use the local configuration only if not initialServerList: return S_OK() randomServerList = List.randomize( initialServerList ) gLogger.debug( "Randomized server list is %s" % ", ".join( randomServerList ) ) for sServer in randomServerList: from DIRAC.Core.DISET.RPCClient import RPCClient oClient = RPCClient( sServer, useCertificates = gConfigurationData.useServerCertificate(), skipCACheck = gConfigurationData.skipCACheck() ) dRetVal = _updateFromRemoteLocation( oClient ) if dRetVal[ 'OK' ]: return dRetVal else: updatingErrorsList.append( dRetVal[ 'Message' ] ) gLogger.warn( "Can't update from server", "Error while updating from %s: %s" % ( sServer, dRetVal[ 'Message' ] ) ) if dRetVal[ 'Message' ].find( "Insane environment" ) > -1: break return S_ERROR( "Reason(s):\n\t%s" % "\n\t".join( List.uniqueElements( updatingErrorsList ) ) )
def __configurePilot(basepath, vo): """ Configures the pilot. This method was created specifically for LHCb pilots, more info about othe VOs is needed to make it more general. """ from DIRAC.ConfigurationSystem.Client.Helpers.CSGlobals import getVO, getSetup from DIRAC.ConfigurationSystem.Client.ConfigurationData import gConfigurationData vo = getVO() currentSetup = getSetup() masterCS = gConfigurationData.getMasterServer() os.system("python " + basepath + "dirac-pilot.py -S %s -l %s -C %s -N ce.debug.ch -Q default -n DIRAC.JobDebugger.ch -dd" %(currentSetup, vo, masterCS)) dir = os.path.expanduser('~') + os.path.sep try: os.rename(dir + '.dirac.cfg', dir + '.dirac.cfg.old') except OSError: pass shutil.copyfile(dir + 'pilot.cfg', dir + '.dirac.cfg')
def getCSDict(self, includeMasterCS=True): """Gets minimal info for running a pilot, from the CS :returns: pilotDict (containing pilots run info) :rtype: S_OK, S_ERROR, value is pilotDict """ pilotDict = { "timestamp": datetime.datetime.utcnow().isoformat(), "Setups": {}, "CEs": {}, "GenericPilotDNs": [], } self.log.info("-- Getting the content of the CS --") # These are in fact not only setups: they may be "Defaults" sections, or VOs, in multi-VOs installations setupsRes = gConfig.getSections("/Operations/") if not setupsRes["OK"]: self.log.error("Can't get sections from Operations", setupsRes["Message"]) return setupsRes setupsInOperations = setupsRes["Value"] # getting the setup(s) in this CS, and comparing with what we found in Operations setupsInDIRACRes = gConfig.getSections("DIRAC/Setups") if not setupsInDIRACRes["OK"]: self.log.error("Can't get sections from DIRAC/Setups", setupsInDIRACRes["Message"]) return setupsInDIRACRes setupsInDIRAC = setupsInDIRACRes["Value"] # Handling the case of multi-VO CS if not set(setupsInDIRAC).intersection(set(setupsInOperations)): vos = list(setupsInOperations) for vo in vos: setupsFromVOs = gConfig.getSections("/Operations/%s" % vo) if not setupsFromVOs["OK"]: continue else: setupsInOperations = setupsFromVOs["Value"] self.log.verbose("From Operations/[Setup]/Pilot") for setup in setupsInOperations: self._getPilotOptionsPerSetup(setup, pilotDict) self.log.verbose("From Resources/Sites") sitesSection = gConfig.getSections("/Resources/Sites/") if not sitesSection["OK"]: self.log.error("Can't get sections from Resources", sitesSection["Message"]) return sitesSection for grid in sitesSection["Value"]: gridSection = gConfig.getSections("/Resources/Sites/" + grid) if not gridSection["OK"]: self.log.error("Can't get sections from Resources", gridSection["Message"]) return gridSection for site in gridSection["Value"]: ceList = gConfig.getSections( cfgPath("/Resources", "Sites", grid, site, "CEs")) if not ceList["OK"]: # Skip but log it self.log.error("Site has no CEs! - skipping", site) continue for ce in ceList["Value"]: # This CEType is like 'HTCondor' or 'ARC' etc. ceType = gConfig.getValue( cfgPath("/Resources", "Sites", grid, site, "CEs", ce, "CEType")) if ceType is None: # Skip but log it self.log.error("CE has no option CEType!", ce + " at " + site) pilotDict["CEs"][ce] = {"Site": site} else: pilotDict["CEs"][ce] = { "Site": site, "GridCEType": ceType } # This LocalCEType is like 'InProcess' or 'Pool' or 'Pool/Singularity' etc. # It can be in the queue and/or the CE level localCEType = gConfig.getValue( cfgPath("/Resources", "Sites", grid, site, "CEs", ce, "LocalCEType")) if localCEType is not None: pilotDict["CEs"][ce].setdefault( "LocalCEType", localCEType) res = gConfig.getSections( cfgPath("/Resources", "Sites", grid, site, "CEs", ce, "Queues")) if not res["OK"]: # Skip but log it self.log.error("No queues found for CE", ce + ": " + res["Message"]) continue queueList = res["Value"] for queue in queueList: localCEType = gConfig.getValue( cfgPath("/Resources", "Sites", grid, site, "CEs", ce, "Queues", queue, "LocalCEType")) if localCEType is not None: pilotDict["CEs"][ce].setdefault( queue, {"LocalCEType": localCEType}) defaultSetup = gConfig.getValue("/DIRAC/DefaultSetup") if defaultSetup: pilotDict["DefaultSetup"] = defaultSetup self.log.debug("From DIRAC/Configuration") configurationServers = gConfig.getServersList() if not includeMasterCS: masterCS = gConfigurationData.getMasterServer() configurationServers = list( set(configurationServers) - set([masterCS])) pilotDict["ConfigurationServers"] = configurationServers self.log.debug("Got pilotDict", str(pilotDict)) return S_OK(pilotDict)
def _getPilotOptions(self, taskQueueDict, pilotsToSubmit): # Need to limit the maximum number of pilots to submit at once # For generic pilots this is limited by the number of use of the tokens and the # maximum number of jobs in Filling mode, but for private Jobs we need an extra limitation: pilotsToSubmit = max(min(pilotsToSubmit, int(50 / self.maxJobsInFillMode)), 1) pilotOptions = [] privateIfGenericTQ = self.privatePilotFraction > random.random() privateTQ = "PilotTypes" in taskQueueDict and "private" in [t.lower() for t in taskQueueDict["PilotTypes"]] forceGeneric = "ForceGeneric" in taskQueueDict submitPrivatePilot = (privateIfGenericTQ or privateTQ) and not forceGeneric if submitPrivatePilot: self.log.verbose("Submitting private pilots for TaskQueue %s" % taskQueueDict["TaskQueueID"]) ownerDN = taskQueueDict["OwnerDN"] ownerGroup = taskQueueDict["OwnerGroup"] # User Group requirement pilotOptions.append("-G %s" % taskQueueDict["OwnerGroup"]) # check if group allows jobsharing ownerGroupProperties = getPropertiesForGroup(ownerGroup) if not "JobSharing" in ownerGroupProperties: # Add Owner requirement to pilot pilotOptions.append("-O '%s'" % ownerDN) if privateTQ: pilotOptions.append("-o /Resources/Computing/CEDefaults/PilotType=private") maxJobsInFillMode = self.maxJobsInFillMode else: # For generic jobs we'll submit mixture of generic and private pilots self.log.verbose("Submitting generic pilots for TaskQueue %s" % taskQueueDict["TaskQueueID"]) # ADRI: Find the generic group result = findGenericPilotCredentials(group=taskQueueDict["OwnerGroup"]) if not result["OK"]: self.log.error(ERROR_GENERIC_CREDENTIALS, result["Message"]) return S_ERROR(ERROR_GENERIC_CREDENTIALS) ownerDN, ownerGroup = result["Value"] result = gProxyManager.requestToken(ownerDN, ownerGroup, max(pilotsToSubmit, self.maxJobsInFillMode)) if not result["OK"]: self.log.error(ERROR_TOKEN, result["Message"]) return S_ERROR(ERROR_TOKEN) (token, numberOfUses) = result["Value"] pilotsToSubmit = min(numberOfUses, pilotsToSubmit) pilotOptions.append("-o /Security/ProxyToken=%s" % token) pilotsToSubmit = max(1, (pilotsToSubmit - 1) / self.maxJobsInFillMode + 1) maxJobsInFillMode = int(numberOfUses / pilotsToSubmit) # Use Filling mode pilotOptions.append("-M %s" % maxJobsInFillMode) # Debug pilotOptions.append("-d") # Setup. pilotOptions.append("-S %s" % taskQueueDict["Setup"]) # CS Servers csServers = gConfig.getServersList() if len(csServers) > 3: # Remove the master master = gConfigurationData.getMasterServer() if master in csServers: csServers.remove(master) pilotOptions.append("-C %s" % ",".join(csServers)) # DIRAC Extensions extensionsList = getCSExtensions() if extensionsList: pilotOptions.append("-e %s" % ",".join(extensionsList)) # Get DIRAC version and project, There might be global Setup defaults and per VO/Setup defaults (from configure) opsHelper = Operations(group=taskQueueDict["OwnerGroup"], setup=taskQueueDict["Setup"]) # Requested version of DIRAC (it can be a list, so we take the fist one) version = opsHelper.getValue(cfgPath("Pilot", "Version"), [self.installVersion])[0] pilotOptions.append("-r %s" % version) # Requested Project to install installProject = opsHelper.getValue(cfgPath("Pilot", "Project"), self.installProject) if installProject: pilotOptions.append("-l %s" % installProject) installation = opsHelper.getValue(cfgPath("Pilot", "Installation"), self.installation) if installation: pilotOptions.append("-V %s" % installation) # Requested CPU time pilotOptions.append("-T %s" % taskQueueDict["CPUTime"]) if self.submitPoolOption not in self.extraPilotOptions: pilotOptions.append(self.submitPoolOption) if self.extraPilotOptions: pilotOptions.extend(self.extraPilotOptions) return S_OK((pilotOptions, pilotsToSubmit, ownerDN, ownerGroup, submitPrivatePilot, privateTQ))
def _getPilotOptions( self, taskQueueDict, pilotsToSubmit ): # Need to limit the maximum number of pilots to submit at once # For generic pilots this is limited by the number of use of the tokens and the # maximum number of jobs in Filling mode, but for private Jobs we need an extra limitation: pilotsToSubmit = max( min( pilotsToSubmit, int( 50 / self.maxJobsInFillMode ) ), 1 ) pilotOptions = [] privateIfGenericTQ = self.privatePilotFraction > random.random() privateTQ = ( 'PilotTypes' in taskQueueDict and 'private' in [ t.lower() for t in taskQueueDict['PilotTypes'] ] ) forceGeneric = 'ForceGeneric' in taskQueueDict submitPrivatePilot = ( privateIfGenericTQ or privateTQ ) and not forceGeneric if submitPrivatePilot: self.log.verbose( 'Submitting private pilots for TaskQueue %s' % taskQueueDict['TaskQueueID'] ) ownerDN = taskQueueDict['OwnerDN'] ownerGroup = taskQueueDict['OwnerGroup'] # User Group requirement pilotOptions.append( '-G %s' % taskQueueDict['OwnerGroup'] ) # check if group allows jobsharing ownerGroupProperties = getPropertiesForGroup( ownerGroup ) if not 'JobSharing' in ownerGroupProperties: # Add Owner requirement to pilot pilotOptions.append( "-O '%s'" % ownerDN ) if privateTQ: pilotOptions.append( '-o /Resources/Computing/CEDefaults/PilotType=private' ) maxJobsInFillMode = self.maxJobsInFillMode else: #For generic jobs we'll submit mixture of generic and private pilots self.log.verbose( 'Submitting generic pilots for TaskQueue %s' % taskQueueDict['TaskQueueID'] ) #ADRI: Find the generic group result = findGenericPilotCredentials( group = taskQueueDict[ 'OwnerGroup' ] ) if not result[ 'OK' ]: self.log.error( ERROR_GENERIC_CREDENTIALS, result[ 'Message' ] ) return S_ERROR( ERROR_GENERIC_CREDENTIALS ) ownerDN, ownerGroup = result[ 'Value' ] result = gProxyManager.requestToken( ownerDN, ownerGroup, max( pilotsToSubmit, self.maxJobsInFillMode ) ) if not result[ 'OK' ]: self.log.error( ERROR_TOKEN, result['Message'] ) return S_ERROR( ERROR_TOKEN ) ( token, numberOfUses ) = result[ 'Value' ] pilotsToSubmit = min( numberOfUses, pilotsToSubmit ) pilotOptions.append( '-o /Security/ProxyToken=%s' % token ) pilotsToSubmit = max( 1, ( pilotsToSubmit - 1 ) / self.maxJobsInFillMode + 1 ) maxJobsInFillMode = int( numberOfUses / pilotsToSubmit ) # Use Filling mode pilotOptions.append( '-M %s' % maxJobsInFillMode ) # Debug pilotOptions.append( '-d' ) # Setup. pilotOptions.append( '-S %s' % taskQueueDict['Setup'] ) # CS Servers csServers = gConfig.getServersList() if len( csServers ) > 3: # Remove the master master = gConfigurationData.getMasterServer() if master in csServers: csServers.remove( master ) pilotOptions.append( '-C %s' % ",".join( csServers ) ) # DIRAC Extensions to be used in pilots # ubeda: I'm not entirely sure if we can use here the same opsHelper as in line # line +352 pilotExtensionsList = Operations().getValue( "Pilot/Extensions", [] ) extensionsList = [] if pilotExtensionsList: if pilotExtensionsList[0] != 'None': extensionsList = pilotExtensionsList else: extensionsList = getCSExtensions() if extensionsList: pilotOptions.append( '-e %s' % ",".join( extensionsList ) ) #Get DIRAC version and project, There might be global Setup defaults and per VO/Setup defaults (from configure) opsHelper = Operations( group = taskQueueDict['OwnerGroup'], setup = taskQueueDict['Setup'] ) # Requested version of DIRAC (it can be a list, so we take the fist one) version = opsHelper.getValue( cfgPath( 'Pilot', 'Version' ) , [ self.installVersion ] )[0] pilotOptions.append( '-r %s' % version ) # Requested Project to install installProject = opsHelper.getValue( cfgPath( 'Pilot', 'Project' ) , self.installProject ) if installProject: pilotOptions.append( '-l %s' % installProject ) installation = opsHelper.getValue( cfgPath( 'Pilot', 'Installation' ), self.installation ) if installation: pilotOptions.append( "-V %s" % installation ) # Requested CPU time pilotOptions.append( '-T %s' % taskQueueDict['CPUTime'] ) if self.submitPoolOption not in self.extraPilotOptions: pilotOptions.append( self.submitPoolOption ) if self.extraPilotOptions: pilotOptions.extend( self.extraPilotOptions ) return S_OK( ( pilotOptions, pilotsToSubmit, ownerDN, ownerGroup, submitPrivatePilot, privateTQ ) )
def _getPilotOptions(self, taskQueueDict, pilotsToSubmit): # Need to limit the maximum number of pilots to submit at once # For generic pilots this is limited by the number of use of the tokens and the # maximum number of jobs in Filling mode, but for private Jobs we need an extra limitation: pilotsToSubmit = max( min(pilotsToSubmit, int(50 / self.maxJobsInFillMode)), 1) pilotOptions = [] privateIfGenericTQ = self.privatePilotFraction > random.random() privateTQ = ('PilotTypes' in taskQueueDict and 'private' in [t.lower() for t in taskQueueDict['PilotTypes']]) forceGeneric = 'ForceGeneric' in taskQueueDict submitPrivatePilot = (privateIfGenericTQ or privateTQ) and not forceGeneric if submitPrivatePilot: self.log.verbose('Submitting private pilots for TaskQueue %s' % taskQueueDict['TaskQueueID']) ownerDN = taskQueueDict['OwnerDN'] ownerGroup = taskQueueDict['OwnerGroup'] # User Group requirement pilotOptions.append('-G %s' % taskQueueDict['OwnerGroup']) # check if group allows jobsharing ownerGroupProperties = getPropertiesForGroup(ownerGroup) if not 'JobSharing' in ownerGroupProperties: # Add Owner requirement to pilot pilotOptions.append("-O '%s'" % ownerDN) if privateTQ: pilotOptions.append( '-o /Resources/Computing/CEDefaults/PilotType=private') maxJobsInFillMode = self.maxJobsInFillMode else: #For generic jobs we'll submit mixture of generic and private pilots self.log.verbose('Submitting generic pilots for TaskQueue %s' % taskQueueDict['TaskQueueID']) #ADRI: Find the generic group result = findGenericPilotCredentials( group=taskQueueDict['OwnerGroup']) if not result['OK']: self.log.error(ERROR_GENERIC_CREDENTIALS, result['Message']) return S_ERROR(ERROR_GENERIC_CREDENTIALS) ownerDN, ownerGroup = result['Value'] result = gProxyManager.requestToken( ownerDN, ownerGroup, max(pilotsToSubmit, self.maxJobsInFillMode)) if not result['OK']: self.log.error(ERROR_TOKEN, result['Message']) return S_ERROR(ERROR_TOKEN) (token, numberOfUses) = result['Value'] pilotsToSubmit = min(numberOfUses, pilotsToSubmit) pilotOptions.append('-o /Security/ProxyToken=%s' % token) pilotsToSubmit = max( 1, (pilotsToSubmit - 1) / self.maxJobsInFillMode + 1) maxJobsInFillMode = int(numberOfUses / pilotsToSubmit) # Use Filling mode pilotOptions.append('-M %s' % maxJobsInFillMode) # Debug pilotOptions.append('-d') # Setup. pilotOptions.append('-S %s' % taskQueueDict['Setup']) # CS Servers csServers = gConfig.getServersList() if len(csServers) > 3: # Remove the master master = gConfigurationData.getMasterServer() if master in csServers: csServers.remove(master) pilotOptions.append('-C %s' % ",".join(csServers)) # DIRAC Extensions to be used in pilots # ubeda: I'm not entirely sure if we can use here the same opsHelper as in line # line +352 pilotExtensionsList = Operations().getValue("Pilot/Extensions", []) extensionsList = [] if pilotExtensionsList: if pilotExtensionsList[0] != 'None': extensionsList = pilotExtensionsList else: extensionsList = getCSExtensions() if extensionsList: pilotOptions.append('-e %s' % ",".join(extensionsList)) #Get DIRAC version and project, There might be global Setup defaults and per VO/Setup defaults (from configure) opsHelper = Operations(group=taskQueueDict['OwnerGroup'], setup=taskQueueDict['Setup']) # Requested version of DIRAC (it can be a list, so we take the fist one) version = opsHelper.getValue(cfgPath('Pilot', 'Version'), [self.installVersion])[0] pilotOptions.append('-r %s' % version) # Requested Project to install installProject = opsHelper.getValue(cfgPath('Pilot', 'Project'), self.installProject) if installProject: pilotOptions.append('-l %s' % installProject) installation = opsHelper.getValue(cfgPath('Pilot', 'Installation'), self.installation) if installation: pilotOptions.append("-V %s" % installation) # Requested CPU time pilotOptions.append('-T %s' % taskQueueDict['CPUTime']) if self.submitPoolOption not in self.extraPilotOptions: pilotOptions.append(self.submitPoolOption) if self.extraPilotOptions: pilotOptions.extend(self.extraPilotOptions) return S_OK((pilotOptions, pilotsToSubmit, ownerDN, ownerGroup, submitPrivatePilot, privateTQ))
def getCSDict(self, includeMasterCS=True): """ Gets minimal info for running a pilot, from the CS :returns: pilotDict (containing pilots run info) :rtype: S_OK, S_ERROR, value is pilotDict """ pilotDict = { 'timestamp': datetime.datetime.utcnow().isoformat(), 'Setups': {}, 'CEs': {}, 'GenericPilotDNs': [] } self.log.info('-- Getting the content of the CS --') # These are in fact not only setups: they may be "Defaults" sections, or VOs, in multi-VOs installations setupsRes = gConfig.getSections('/Operations/') if not setupsRes['OK']: self.log.error("Can't get sections from Operations", setupsRes['Message']) return setupsRes setupsInOperations = setupsRes['Value'] # getting the setup(s) in this CS, and comparing with what we found in Operations setupsInDIRACRes = gConfig.getSections('DIRAC/Setups') if not setupsInDIRACRes['OK']: self.log.error("Can't get sections from DIRAC/Setups", setupsInDIRACRes['Message']) return setupsInDIRACRes setupsInDIRAC = setupsInDIRACRes['Value'] # Handling the case of multi-VO CS if not set(setupsInDIRAC).intersection(set(setupsInOperations)): vos = list(setupsInOperations) for vo in vos: setupsFromVOs = gConfig.getSections('/Operations/%s' % vo) if not setupsFromVOs['OK']: continue else: setupsInOperations = setupsFromVOs['Value'] self.log.verbose('From Operations/[Setup]/Pilot') for setup in setupsInOperations: self._getPilotOptionsPerSetup(setup, pilotDict) self.log.verbose('From Resources/Sites') sitesSection = gConfig.getSections('/Resources/Sites/') if not sitesSection['OK']: self.log.error("Can't get sections from Resources", sitesSection['Message']) return sitesSection for grid in sitesSection['Value']: gridSection = gConfig.getSections('/Resources/Sites/' + grid) if not gridSection['OK']: self.log.error("Can't get sections from Resources", gridSection['Message']) return gridSection for site in gridSection['Value']: ceList = gConfig.getSections('/Resources/Sites/' + grid + '/' + site + '/CEs/') if not ceList['OK']: # Skip but log it self.log.error('Site has no CEs! - skipping', site) continue for ce in ceList['Value']: # This CEType is like 'HTCondor' or 'ARC' etc. ceType = gConfig.getValue('/Resources/Sites/' + grid + '/' + site + '/CEs/' + ce + '/CEType') if ceType is None: # Skip but log it self.log.error('CE has no option CEType!', ce + ' at ' + site) pilotDict['CEs'][ce] = {'Site': site} else: pilotDict['CEs'][ce] = { 'Site': site, 'GridCEType': ceType } # This LocalCEType is like 'InProcess' or 'Pool' or 'Pool/Singularity' etc. # It can be in the queue and/or the CE level localCEType = gConfig.getValue('/Resources/Sites/' + grid + '/' + site + '/CEs/' + ce + '/LocalCEType') if localCEType is not None: pilotDict['CEs'][ce].setdefault( 'LocalCEType', localCEType) res = gConfig.getSections('/Resources/Sites/' + grid + '/' + site + '/CEs/' + ce + '/Queues/') if not res['OK']: # Skip but log it self.log.error("No queues found for CE", ce + ': ' + res['Message']) continue queueList = res['Value'] for queue in queueList: localCEType = gConfig.getValue('/Resources/Sites/' + grid + '/' + site + '/CEs/' + ce + '/Queues/' + queue + '/LocalCEType') if localCEType is not None: pilotDict['CEs'][ce].setdefault( queue, {'LocalCEType': localCEType}) defaultSetup = gConfig.getValue('/DIRAC/DefaultSetup') if defaultSetup: pilotDict['DefaultSetup'] = defaultSetup self.log.debug('From DIRAC/Configuration') configurationServers = gConfig.getServersList() if not includeMasterCS: masterCS = gConfigurationData.getMasterServer() configurationServers = list( set(configurationServers) - set([masterCS])) pilotDict['ConfigurationServers'] = configurationServers self.log.debug("Got pilotDict", str(pilotDict)) return S_OK(pilotDict)