def _sendSMS( self, text ): from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin diracAdmin = DiracAdmin() address = InfoGetter().getNotificationsThatApply( self.decissionParams, self.actionName ) if not address[ 'OK' ]: return address address = address[ 'Value' ] for addressDict in address: if not 'name' in addressDict: return S_ERROR( 'Malformed address dict %s' % addressDict ) if not 'users' in addressDict: return S_ERROR( 'Malformed address dict %s' % addressDict ) for user in addressDict[ 'users' ]: # Where are the SMS numbers defined ? resSMS = diracAdmin.sendSMS( user, text ) if not resSMS[ 'OK' ]: return S_ERROR( 'Cannot send SMS to user "%s"' % user ) return resSMS ################################################################################ #EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF
def _sendMail( self, subject, body ): from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin diracAdmin = DiracAdmin() address = InfoGetter().getNotificationsThatApply( self.decissionParams, self.actionName ) if not address[ 'OK' ]: return address address = address[ 'Value' ] for addressDict in address: if not 'name' in addressDict: return S_ERROR( 'Malformed address dict %s' % addressDict ) if not 'users' in addressDict: return S_ERROR( 'Malformed address dict %s' % addressDict ) for user in addressDict[ 'users' ]: #FIXME: should not I get the info from the RSS User cache ? resEmail = diracAdmin.sendMail( user, subject, body ) if not resEmail[ 'OK' ]: return S_ERROR( 'Cannot send email to user "%s"' % user ) return resEmail ################################################################################ #EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF
def export_getSites(self): """ Get a full list of sites from the service """ activeSites = [] bannedSites = [] # list banned sites diracAdmin = DiracAdmin() bannedSitesHandler = diracAdmin.getBannedSites(printOutput=False) if bannedSitesHandler['OK']: bannedNames = bannedSitesHandler['Value'] for bannedName in bannedNames: history = diracAdmin.getSiteMaskLogging(bannedName)['Value'][bannedName][::-1] bannedSites.append({'name': bannedName, 'status': 'banned', 'swver': '2012-01-01', 'history': simplejson.dumps(history)}) # list not banned sites wmsAdmin = RPCClient('WorkloadManagement/WMSAdministrator', timeout=120) siteMaskHandler = wmsAdmin.getSiteMask() if siteMaskHandler['OK']: activeNames = siteMaskHandler['Value'] for siteName in activeNames: history = diracAdmin.getSiteMaskLogging(siteName)['Value'][siteName][::-1] activeSites.append({'name': siteName, 'status': 'ok', 'swver': '2012-11-02', 'history': simplejson.dumps(history)}) # build list of all sites sitesList = [] sitesList.extend(activeSites) sitesList.extend(bannedSites) if not sitesList: return S_ERROR() return S_OK(sitesList)
def execute(self): """ Get all New tags, mark them as Installing. Old Installing tags are reset to New """ #### get site mask ### diracAdmin = DiracAdmin() res = diracAdmin.getSiteMask(printOutput=False) if not res["OK"]: self.log.error("error retrieving site mask: %s"%str(res["Message"])) site_mask = res["Value"] res = self.swtc.getTagsWithStatus("New") if not res['OK']: return res if not res['Value']: self.log.info("No 'New' tags to consider") for tag, ces in res['Value'].items(): for ce in ces: res = getSiteForCEs([ce]) if not res["OK"]: self.log.error("could not retrieve Site name for CE %s"%ce) sites = res["Value"].keys() for site in sites: if site not in site_mask: self.log.info("CE/Site disabled %s"%site) continue # ignore this CE res = self.swtc.updateCEStatus(tag, ce, 'Installing') if not res['OK']: self.log.error(res['Message']) continue res = None if self.submitjobs: res = self.submitProbeJobs(ce) else: res = self.swtc.updateCEStatus(tag, ce, 'Valid') if not res['OK']: self.log.error(res['Message']) else: self.log.info("Done with %s at %s" %(tag, ce)) ##Also, reset to New tags that were in Probing for too long. res = self.swtc.getTagsWithStatus("Installing", self.delay) if not res['OK']: self.log.error("Failed to get old 'Installing' tags") else: if not res['Value']: self.log.info("No 'Installing' tags to reset") for tag, ces in res['Value'].items(): for ce in ces: res = self.swtc.updateCEStatus(tag, ce, 'New') if not res['OK']: self.log.error(res['Message']) continue return S_OK()
def browseUsers(self): diracAdmin = DiracAdmin() names = diracAdmin.csListUsers('belle')['Value'] users = diracAdmin.csDescribeUsers(names)['Value'] c.usersData = [] for name in names: email = users[name]['Email'] dn = users[name]['DN'] c.usersData.append({'name': name, 'email': email, 'dn': dn}) return render("/systems/sitemanagement/browseUsers.mako")
def getPilotLoggingInfo(gridID): output = "" diracAdmin = DiracAdmin() result = diracAdmin.getPilotLoggingInfo( gridID ) if not result['OK']: output = 'ERROR retrieving pilot logging info, %s'%str(result['Message']) gLogger.error(output) else: output = result['Value'] return output
def doCommand( self ): ''' It returns the status of a given CE. :return: a dictionary with status of each CE queues, and 'status' and 'reason' of the CE itself ''' ## INPUT PARAMETERS vos = getVOs() if vos[ 'OK' ]: vo = vos['Value'].pop() else: return S_ERROR( "No appropriate VO was found! %s" % vos['Message'] ) if 'ce' not in self.args: return S_ERROR( "No computing element 'ce' has been specified!" ) else: ce = self.args['ce'] host = self.args.get('host') #getting BDII info diracAdmin = DiracAdmin() ceQueues = diracAdmin.getBDIICEState( ce, useVO = vo, host = host ) if not ceQueues['OK']: return S_ERROR( '"CE" not found on BDII' ) elements = ceQueues['Value'] #extracting the list of CE queues and their status result = {} for element in elements: queue = element.get('GlueCEUniqueID','Unknown') #pylint: disable=no-member statusQueue = element.get('GlueCEStateStatus','Unknown') #pylint: disable=no-member result[queue] = statusQueue.capitalize() #establishing the status of the CE itself result['Status'] = 'Production' result['Reason'] = "All queues in 'Production'" for key, value in result.items(): #warning: it may not be the case that all queues for a given CE #show the same status. In case of mismatch, the status of the CE #will be associated to a non-production status if key not in ['Status', 'Reason'] and value != 'Production': result['Status'] = value result['Reason'] = "Queue %s is in status %s" % ( queue, value ) return S_OK( result )
def initialize( self ): ''' EmailAgent initialization ''' self.diracAdmin = DiracAdmin() return S_OK()
def getInfo(params): ''' Retrieve information from BDII ''' from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin diracAdmin = DiracAdmin() if params['info'] == 'ce': result = diracAdmin.getBDIICE(params['ce'], host=params['host']) if params['info'] == 'ce-state': result = diracAdmin.getBDIICEState(params['ce'], useVO=params['vo'], host=params['host']) if params['info'] == 'ce-cluster': result = diracAdmin.getBDIICluster(params['ce'], host=params['host']) if params['info'] == 'ce-vo': result = diracAdmin.getBDIICEVOView(params['ce'], useVO=params['vo'], host=params['host']) if params['info'] == 'site': result = diracAdmin.getBDIISite(params['site'], host=params['host']) if params['info'] == 'site-se': result = diracAdmin.getBDIISE(params['site'], useVO=params['vo'], host=params['host']) if not result['OK']: print result['Message'] DIRAC.exit(2) return result
def initialize(self): self.pollingTime = self.am_getOption('PollingTime', 86400) gMonitor.registerActivity("Iteration", "Agent Loops", AGENT_NAME, "Loops/min", gMonitor.OP_SUM) self.ppc = ProcessProdClient() self.dirac = DiracILC() self.diracadmin = DiracAdmin() self.am_setOption( 'shifterProxy', 'Admin' ) return S_OK()
def addUserToCS(clip, userProps): """Add the user to the CS, return list of errors""" from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin diracAdmin = DiracAdmin() exitCode = 0 errorList = [] if not diracAdmin.csModifyUser( clip.uname, userProps, createIfNonExistant = True )['OK']: errorList.append( ( "add user", "Cannot register user: '******'" % clip.uname ) ) exitCode = 255 else: result = diracAdmin.csCommitChanges() if not result[ 'OK' ]: errorList.append( ( "commit", result[ 'Message' ] ) ) exitCode = 255 for error in errorList: gLogger.error( "%s: %s" % error ) if exitCode: dexit(exitCode)
def __getSitesMaskStatus( self, sitesName ): diracAdmin = DiracAdmin() activeSites = diracAdmin.getSiteMask() # wmsAdmin = RPCClient( 'WorkloadManagement/WMSAdministrator' ) # activeSites = wmsAdmin.getSiteMask() if not activeSites[ 'OK' ]: return activeSites activeSites = activeSites[ 'Value' ] sitesStatus = {} for siteName in sitesName: if siteName in activeSites: sitesStatus[ siteName ] = 'Active' else: sitesStatus[ siteName ] = 'Banned' return S_OK( sitesStatus )
def initialize( self ): ''' TokenAgent initialization ''' self.notifyHours = self.am_getOption( 'notifyHours', self.notifyHours ) self.adminMail = self.am_getOption( 'adminMail', self.adminMail ) self.rsClient = ResourceStatusClient() self.diracAdmin = DiracAdmin() return S_OK()
def initialize( self ): """Sets defaults """ self.am_setOption( 'PollingTime', 120 ) self.am_setOption( 'GridEnv', '' ) self.am_setOption( 'PilotStalledDays', 3 ) self.pilotDB = PilotAgentsDB() self.diracadmin = DiracAdmin() self.jobDB = JobDB() return S_OK()
def __init__(self, cliParams ): from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin self.diracAdmin = DiracAdmin() self.modifiedCS = False self.softSec = "/Operations/Defaults/AvailableTarBalls" self.mailadress = '*****@*****.**' self.cliParams = cliParams self.parameter = dict( softSec = self.softSec, platform = cliParams.platform, version = cliParams.version, basepath = cliParams.basePath, initsctipt = cliParams.initScriptLocation ) self.applications = cliParams.applicationList
def initialize(self): """Sets defaults """ self.am_setOption('PollingTime', 120) self.am_setOption('GridEnv', '') self.am_setOption('PilotStalledDays', 3) self.pilotDB = PilotAgentsDB() self.diracadmin = DiracAdmin() self.jobDB = JobDB() self.clearPilotsDelay = self.am_getOption('ClearPilotsDelay', 30) self.clearAbortedDelay = self.am_getOption('ClearAbortedPilotsDelay', 7) self.WMSAdministrator = WMSAdministratorClient() return S_OK()
def __init__( self ): """ Constructor. examples: >>> s = Synchronizer() """ self.log = gLogger.getSubLogger( self.__class__.__name__ ) self.operations = Operations() self.resources = Resources() self.rStatus = ResourceStatusClient.ResourceStatusClient() self.rssConfig = RssConfiguration() self.diracAdmin = DiracAdmin()
def __init__(self, platform, appName, tarball_loc, appVersion, comment): from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin self.diracAdmin = DiracAdmin() self.modifiedCS = False self.appVersion = appVersion self.appName = appName self.platform = platform self.softSec = "/Operations/Defaults/AvailableTarBalls" self.appTar = self.checkForTarBall(tarball_loc) self.parameter = dict( softSec = self.softSec, platform = self.platform, appName = self.appName, appname = self.appName.lower(), appTar = self.appTar, appTar_name = os.path.basename(self.appTar), appVersion = self.appVersion, ) self.comment = comment self.mailadress = '*****@*****.**'
ret = getProxyInfo( disableVOMS = True ) if ret['OK'] and 'group' in ret['Value']: vo = getVOForGroup( ret['Value']['group'] ) for unprocSw in Script.getUnprocessedSwitches(): if unprocSw[0] in ( "H", "host" ): host = unprocSw[1] if unprocSw[0] in ( "V", "vo" ): vo = unprocSw[1] if not vo: Script.gLogger.error( 'Could not determine VO' ) Script.showHelp() from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin diracAdmin = DiracAdmin() result = diracAdmin.getBDIICEVOView( ceName, useVO = vo, host = host ) if not result['OK']: print result['Message'] DIRAC.exit( 2 ) ces = result['Value'] for ceObj in ces: print "CEVOView: %s {" % ceObj.get( 'GlueChunkKey', 'Unknown' ) for item in ceObj.iteritems(): print "%s: %s" % item print "}"
def main(): read = False write = False check = False remove = False site = "" mute = False Script.registerSwitch("r", "AllowRead", " Allow only reading from the storage element") Script.registerSwitch("w", "AllowWrite", " Allow only writing to the storage element") Script.registerSwitch("k", "AllowCheck", " Allow only check access to the storage element") Script.registerSwitch("v", "AllowRemove", " Allow only remove access to the storage element") Script.registerSwitch("a", "All", " Allow all access to the storage element") Script.registerSwitch("m", "Mute", " Do not send email") Script.registerSwitch("S:", "Site=", " Allow all SEs associated to site") # Registering arguments will automatically add their description to the help menu Script.registerArgument(["seGroupList: list of SEs or comma-separated SEs"]) switches, ses = Script.parseCommandLine(ignoreErrors=True) for switch in switches: if switch[0].lower() in ("r", "allowread"): read = True if switch[0].lower() in ("w", "allowwrite"): write = True if switch[0].lower() in ("k", "allowcheck"): check = True if switch[0].lower() in ("v", "allowremove"): remove = True if switch[0].lower() in ("a", "all"): read = True write = True check = True remove = True if switch[0].lower() in ("m", "mute"): mute = True if switch[0].lower() in ("s", "site"): site = switch[1] # imports from DIRAC import gConfig, gLogger from DIRAC.ConfigurationSystem.Client.Helpers.Operations import Operations from DIRAC.ConfigurationSystem.Client.Helpers.Resources import getSites from DIRAC.Core.Security.ProxyInfo import getProxyInfo from DIRAC.DataManagementSystem.Utilities.DMSHelpers import resolveSEGroup from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin from DIRAC.ResourceStatusSystem.Client.ResourceStatus import ResourceStatus if not (read or write or check or remove): # No switch was specified, means we need all of them gLogger.notice("No option given, all accesses will be allowed if they were not") read = True write = True check = True remove = True ses = resolveSEGroup(ses) diracAdmin = DiracAdmin() errorList = [] setup = gConfig.getValue("/DIRAC/Setup", "") if not setup: print("ERROR: Could not contact Configuration Service") DIRAC.exit(2) res = getProxyInfo() if not res["OK"]: gLogger.error("Failed to get proxy information", res["Message"]) DIRAC.exit(2) userName = res["Value"].get("username") if not userName: gLogger.error("Failed to get username for proxy") DIRAC.exit(2) if site: res = getSites() if not res["OK"]: gLogger.error(res["Message"]) DIRAC.exit(-1) if site not in res["Value"]: gLogger.error("The provided site (%s) is not known." % site) DIRAC.exit(-1) ses.extend(res["Value"]["SE"].replace(" ", "").split(",")) if not ses: gLogger.error("There were no SEs provided") DIRAC.exit() STATUS_TYPES = ["ReadAccess", "WriteAccess", "CheckAccess", "RemoveAccess"] ALLOWED_STATUSES = ["Unknown", "InActive", "Banned", "Probing", "Degraded"] statusAllowedDict = {} for statusType in STATUS_TYPES: statusAllowedDict[statusType] = [] statusFlagDict = {} statusFlagDict["ReadAccess"] = read statusFlagDict["WriteAccess"] = write statusFlagDict["CheckAccess"] = check statusFlagDict["RemoveAccess"] = remove resourceStatus = ResourceStatus() res = resourceStatus.getElementStatus(ses, "StorageElement") if not res["OK"]: gLogger.error("Storage Element %s does not exist" % ses) DIRAC.exit(-1) reason = "Forced with dirac-admin-allow-se by %s" % userName for se, seOptions in res["Value"].items(): # InActive is used on the CS model, Banned is the equivalent in RSS for statusType in STATUS_TYPES: if statusFlagDict[statusType]: if seOptions.get(statusType) == "Active": gLogger.notice("%s status of %s is already Active" % (statusType, se)) continue if statusType in seOptions: if not seOptions[statusType] in ALLOWED_STATUSES: gLogger.notice( "%s option for %s is %s, instead of %s" % (statusType, se, seOptions["ReadAccess"], ALLOWED_STATUSES) ) gLogger.notice("Try specifying the command switches") else: resR = resourceStatus.setElementStatus( se, "StorageElement", statusType, "Active", reason, userName ) if not resR["OK"]: gLogger.fatal( "Failed to update %s %s to Active, exit -" % (se, statusType), resR["Message"] ) DIRAC.exit(-1) else: gLogger.notice("Successfully updated %s %s to Active" % (se, statusType)) statusAllowedDict[statusType].append(se) totalAllowed = 0 totalAllowedSEs = [] for statusType in STATUS_TYPES: totalAllowed += len(statusAllowedDict[statusType]) totalAllowedSEs += statusAllowedDict[statusType] totalAllowedSEs = list(set(totalAllowedSEs)) if not totalAllowed: gLogger.info("No storage elements were allowed") DIRAC.exit(-1) if mute: gLogger.notice("Email is muted by script switch") DIRAC.exit(0) subject = "%s storage elements allowed for use" % len(totalAllowedSEs) addressPath = "EMail/Production" address = Operations().getValue(addressPath, "") body = "" if read: body = "%s\n\nThe following storage elements were allowed for reading:" % body for se in statusAllowedDict["ReadAccess"]: body = "%s\n%s" % (body, se) if write: body = "%s\n\nThe following storage elements were allowed for writing:" % body for se in statusAllowedDict["WriteAccess"]: body = "%s\n%s" % (body, se) if check: body = "%s\n\nThe following storage elements were allowed for checking:" % body for se in statusAllowedDict["CheckAccess"]: body = "%s\n%s" % (body, se) if remove: body = "%s\n\nThe following storage elements were allowed for removing:" % body for se in statusAllowedDict["RemoveAccess"]: body = "%s\n%s" % (body, se) if not address: gLogger.notice("'%s' not defined in Operations, can not send Mail\n" % addressPath, body) DIRAC.exit(0) res = diracAdmin.sendMail(address, subject, body) gLogger.notice("Notifying %s" % address) if res["OK"]: gLogger.notice(res["Value"]) else: gLogger.notice(res["Message"]) DIRAC.exit(0)
#!/usr/bin/env python ######################################################################## # $HeadURL$ # File : dirac-admin-request-summary # Author : Stuart Paterson ######################################################################## __RCSID__ = "$Id$" import DIRAC from DIRAC.Core.Base import Script Script.parseCommandLine(ignoreErrors=True) args = Script.getPositionalArgs() from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin diracAdmin = DiracAdmin() result = diracAdmin.getRequestSummary(printOutput=True) if result['OK']: DIRAC.exit(0) else: print result['Message'] DIRAC.exit(2)
def getServicePorts(): return DiracAdmin().getServicePorts()
""" __RCSID__ = "$Id$" import DIRAC from DIRAC.Core.Base import Script Script.setUsageMessage('\n'.join([ __doc__.split('\n')[1], 'Usage:', ' %s <pilot reference>' % Script.scriptName ])) Script.parseCommandLine(ignoreErrors=True) args = Script.getPositionalArgs() if len(args) < 1: Script.showHelp(exitCode=1) pilotRef = args[0] from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin diracAdmin = DiracAdmin() exitCode = 0 result = diracAdmin.killPilot(pilotRef) if not result['OK']: DIRAC.gLogger.error('Failed to kill pilot', pilotRef) DIRAC.gLogger.error(result['Message']) exitCode = 1 DIRAC.exit(exitCode)
#!/usr/bin/env python ######################################################################## # $HeadURL$ # File : dirac-admin-get-banned-sites # Author : Stuart Paterson ######################################################################## __RCSID__ = "08edcb9 (2010-12-14 13:13:51 +0000) Ricardo Graciani <*****@*****.**>" import DIRAC from DIRAC.Core.Base import Script Script.parseCommandLine( ignoreErrors = True ) args = Script.getPositionalArgs() from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin diracAdmin = DiracAdmin() result = diracAdmin.getBannedSites( printOutput = False ) if result['OK']: banned_sites = result['Value'] else: print result['Message'] DIRAC.exit( 2 ) for site in banned_sites: result = diracAdmin.getSiteMaskLogging( site ) if result['OK']: sites = result['Value'] print '%-30s %s %s %s' % ( site, sites[site][-1][1], sites[site][-1][2], sites[site][-1][3] ) else: print '%-30s %s' % ( site, result['Message'] )
elif value.lower() == 'false': return False else: Script.showHelp() email = True for switch in Script.getUnprocessedSwitches(): if switch[0] == "email": email = getBoolean( switch[1] ) args = Script.getPositionalArgs() if len( args ) < 2: Script.showHelp() diracAdmin = DiracAdmin() exitCode = 0 errorList = [] setup = gConfig.getValue( '/DIRAC/Setup', '' ) if not setup: print 'ERROR: Could not contact Configuration Service' exitCode = 2 DIRACExit( exitCode ) site = args[0] comment = args[1] result = diracAdmin.addSiteInMask( site, comment, printOutput = True ) if not result['OK']: errorList.append( ( site, result['Message'] ) ) exitCode = 2 else:
class TokenAgent(AgentModule): """ TokenAgent is in charge of checking tokens assigned on resources. Notifications are sent to those users owning expiring tokens. """ # Rss token __rssToken = 'rs_svc' def __init__(self, *args, **kwargs): """ c'tor """ AgentModule.__init__(self, *args, **kwargs) self.notifyHours = 12 self.adminMail = '' self.rsClient = None self.tokenDict = {} self.diracAdmin = None def initialize(self): """ TokenAgent initialization """ self.notifyHours = self.am_getOption('notifyHours', self.notifyHours) self.adminMail = self.am_getOption('adminMail', self.adminMail) self.rsClient = ResourceStatusClient() self.diracAdmin = DiracAdmin() return S_OK() def execute(self): """ Looks for user tokens. If they are expired, or expiring, it notifies users. """ # Initialized here, as it is needed empty at the beginning of the execution self.tokenDict = {} elements = ('Site', 'Resource', 'Node') for element in elements: self.log.info('Processing %s' % element) interestingTokens = self._getInterestingTokens(element) if not interestingTokens['OK']: self.log.error(interestingTokens['Message']) continue interestingTokens = interestingTokens['Value'] processTokens = self._processTokens(element, interestingTokens) if not processTokens['OK']: self.log.error(processTokens['Message']) continue notificationResult = self._notifyOfTokens() if not notificationResult['OK']: self.log.error(notificationResult['Message']) return S_OK() def _getInterestingTokens(self, element): """ Given an element, picks all the entries with TokenExpiration < now + X<hours> If the TokenOwner is not the rssToken ( rs_svc ), it is selected. """ tokenExpLimit = datetime.utcnow() + timedelta(hours=self.notifyHours) tokenElements = self.rsClient.selectStatusElement( element, 'Status', meta={'older': ['TokenExpiration', tokenExpLimit]}) if not tokenElements['OK']: return tokenElements tokenColumns = tokenElements['Columns'] tokenElements = tokenElements['Value'] interestingTokens = [] for tokenElement in tokenElements: tokenElement = dict(zip(tokenColumns, tokenElement)) if tokenElement['TokenOwner'] != self.__rssToken: interestingTokens.append(tokenElement) return S_OK(interestingTokens) def _processTokens(self, element, tokenElements): """ Given an element and a list of interesting token elements, updates the database if the token is expired, logs a message and adds """ never = datetime.max for tokenElement in tokenElements: try: name = tokenElement['Name'] statusType = tokenElement['StatusType'] status = tokenElement['Status'] tokenOwner = tokenElement['TokenOwner'] tokenExpiration = tokenElement['TokenExpiration'] except KeyError as e: return S_ERROR(e) # If token has already expired if tokenExpiration < datetime.utcnow(): _msg = '%s with statusType "%s" and owner %s EXPIRED' self.log.info(_msg % (name, statusType, tokenOwner)) result = self.rsClient.addOrModifyStatusElement( element, 'Status', name=name, statusType=statusType, tokenOwner=self.__rssToken, tokenExpiration=never) if not result['OK']: return result else: _msg = '%s with statusType "%s" and owner %s -> %s' self.log.info(_msg % (name, statusType, tokenOwner, tokenExpiration)) if tokenOwner not in self.tokenDict: self.tokenDict[tokenOwner] = [] self.tokenDict[tokenOwner].append([ tokenOwner, element, name, statusType, status, tokenExpiration ]) return S_OK() def _notifyOfTokens(self): """ Splits interesing tokens between expired and expiring. Also splits them among users. It ends sending notifications to the users. """ now = datetime.utcnow() adminExpired = [] adminExpiring = [] for tokenOwner, tokenLists in self.tokenDict.items(): expired = [] expiring = [] for tokenList in tokenLists: if tokenList[5] < now: expired.append(tokenList) adminExpired.append(tokenList) else: expiring.append(tokenList) adminExpiring.append(tokenList) resNotify = self._notify(tokenOwner, expired, expiring) if not resNotify['OK']: self.log.error('Failed to notify token owner', resNotify['Message']) if (adminExpired or adminExpiring) and self.adminMail: return self._notify(self.adminMail, adminExpired, adminExpiring) return S_OK() def _notify(self, tokenOwner, expired, expiring): """ Given a token owner and a list of expired and expiring tokens, sends an email to the user. """ subject = 'RSS token summary for tokenOwner %s' % tokenOwner mail = '\nEXPIRED tokens ( RSS has taken control of them )\n' for tokenList in expired: mail += ' '.join([str(x) for x in tokenList]) mail += '\n' mail = '\nEXPIRING tokens ( RSS will take control of them )\n' for tokenList in expiring: mail += ' '.join([str(x) for x in tokenList]) mail += '\n' mail += "\n\n You can extend for another 24 hours using the web interface (Set token -> Acquire)\n" mail += " Or you can use the dirac-rss-set-token script\n\n" mail += "Through the same interfaces you can release the token any time\n" # FIXME: you can re-take control of them using this or that... resEmail = self.diracAdmin.sendMail(tokenOwner, subject, mail) if not resEmail['OK']: return S_ERROR('Cannot send email to user "%s"' % tokenOwner) return resEmail
# import os import commands from DIRAC.Core.Base import Script Script.addDefaultOptionValue('LogLevel', 'verbose') Script.parseCommandLine(ignoreErrors=True) from DIRAC.Core.Utilities.Time import fromString, second, dateTime, timeInterval from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin from DIRAC import gLogger now = dateTime() runit_dir = '/opt/dirac/startup' logfile = 'log/current' pollingtime = 60 diracAdmin = DiracAdmin() mailadress = '*****@*****.**' subject = 'CheckStalled' host = os.uname()[1] msg = 'List of Services / agents which could be stalled on ' + host + ' \n\n' def write_log(mesg): """ create the log file """ global msg gLogger.notice(mesg) msg += mesg + '\n' gLogger.getSubLogger("CheckStalledServices") write_log('The script ' + Script.scriptName + ' is running at ' + str(now))
from DIRAC.Core.Base import Script Script.setUsageMessage('\n'.join([ __doc__.split('\n')[1], 'Usage:', ' %s [option|cfgfile] ... JobID ...' % Script.scriptName, 'Arguments:', ' JobID: DIRAC ID of the Job' ])) Script.parseCommandLine(ignoreErrors=True) args = Script.getPositionalArgs() if len(args) < 1: Script.showHelp() from DIRAC import exit as DIRACExit from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin diracAdmin = DiracAdmin() exitCode = 0 errorList = [] for job in args: try: job = int(job) except Exception as x: errorList.append(('Expected integer for JobID', job)) exitCode = 2 continue result = diracAdmin.getJobPilotOutput(job) if not result['OK']: errorList.append((job, result['Message']))
######################################################################## # $HeadURL$ # File : dirac-admin-list-users # Author : Adrian Casajus ######################################################################## __RCSID__ = "$Id$" import DIRAC from DIRAC.Core.Base import Script Script.registerSwitch("e", "extended", "Show extended info") Script.parseCommandLine(ignoreErrors=True) args = Script.getPositionalArgs() from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin diracAdmin = DiracAdmin() exitCode = 0 errorList = [] extendedInfo = False for unprocSw in Script.getUnprocessedSwitches(): if unprocSw[0] in ('e', 'extended'): extendedInfo = True if not extendedInfo: result = diracAdmin.csListHosts() for host in result['Value']: print " %s" % host else: result = diracAdmin.csDescribeHosts() print diracAdmin.pPrint.pformat(result['Value'])
#!/usr/bin/env python ######################################################################## # $HeadURL$ # File : dirac-admin-pilot-summary # Author : Stuart Paterson ######################################################################## __RCSID__ = "12ffa97 (2010-12-14 13:18:28 +0000) Ricardo Graciani <*****@*****.**>" import DIRAC from DIRAC.Core.Base import Script Script.parseCommandLine( ignoreErrors = True ) args = Script.getPositionalArgs() from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin diracAdmin = DiracAdmin() result = diracAdmin.getPilotSummary() if result['OK']: DIRAC.exit( 0 ) else: print result['Message'] DIRAC.exit( 2 )
#!/usr/bin/env python ######################################################################## # $HeadURL$ # File : dirac-admin-externals-versions # Author : Stuart Paterson ######################################################################## __RCSID__ = "$Id$" from DIRAC.Core.Base import Script Script.parseCommandLine( ignoreErrors = True ) from DIRAC import exit as DIRACExit from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin diracAdmin = DiracAdmin() diracAdmin.getExternalPackageVersions() DIRACExit( 0 )
# # test execution if __name__ == "__main__": if len(sys.argv) != 5: gLogger.error( "Usage:\n python %s userGroup SourceSE TargetSE1 TargetSE2\n") sys.exit(-1) userGroup = sys.argv[1] sourceSE = sys.argv[2] targetSE1 = sys.argv[3] targetSE2 = sys.argv[4] gLogger.always("will use '%s' group" % userGroup) admin = DiracAdmin() userName = admin._getCurrentUser() if not userName["OK"]: gLogger.error(userName["Message"]) sys.exit(-1) userName = userName["Value"] gLogger.always("current user is '%s'" % userName) userGroups = getGroupsForUser(userName) if not userGroups["OK"]: gLogger.error(userGroups["Message"]) sys.exit(-1) userGroups = userGroups["Value"] if userGroup not in userGroups:
from DIRAC.Core.Base import Script Script.setUsageMessage( '\n'.join( [ __doc__.split( '\n' )[1], 'Usage:', ' %s [option|cfgfile] ... JobID ...' % Script.scriptName, 'Arguments:', ' JobID: DIRAC ID of the Job' ] ) ) Script.parseCommandLine( ignoreErrors = True ) args = Script.getPositionalArgs() if len( args ) < 1: Script.showHelp() from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin diracAdmin = DiracAdmin() exitCode = 0 errorList = [] for job in args: try: job = int( job ) except Exception, x: errorList.append( ( 'Expected integer for jobID', job ) ) exitCode = 2 continue result = diracAdmin.resetJob( job ) if result['OK']: print 'Reset Job %s' % ( job )
class PilotStatusAgent( AgentModule ): """ The specific agents must provide the following methods: - initialize() for initial settings - beginExecution() - execute() - the main method called in the agent cycle - endExecution() - finalize() - the graceful exit of the method, this one is usually used for the agent restart """ queryStateList = ['Ready', 'Submitted', 'Running', 'Waiting', 'Scheduled'] finalStateList = [ 'Done', 'Aborted', 'Cleared', 'Deleted', 'Failed' ] identityFieldsList = [ 'OwnerDN', 'OwnerGroup', 'GridType', 'Broker' ] eligibleGridTypes = [ 'gLite' ] ############################################################################# def initialize( self ): """Sets defaults """ self.am_setOption( 'PollingTime', 120 ) self.am_setOption( 'GridEnv', '' ) self.am_setOption( 'PilotStalledDays', 3 ) self.pilotDB = PilotAgentsDB() self.diracadmin = DiracAdmin() self.jobDB = JobDB() return S_OK() ############################################################################# def execute( self ): """The PilotAgent execution method. """ self.pilotStalledDays = self.am_getOption( 'PilotStalledDays', 3 ) self.gridEnv = self.am_getOption( 'GridEnv' ) if not self.gridEnv: # No specific option found, try a general one setup = gConfig.getValue( '/DIRAC/Setup', '' ) if setup: instance = gConfig.getValue( '/DIRAC/Setups/%s/WorkloadManagement' % setup, '' ) if instance: self.gridEnv = gConfig.getValue( '/Systems/WorkloadManagement/%s/GridEnv' % instance, '' ) result = self.pilotDB._getConnection() if result['OK']: connection = result['Value'] else: return result result = self.pilotDB.getPilotGroups( self.identityFieldsList, {'Status': self.queryStateList } ) if not result['OK']: self.log.error( 'Fail to get identities Groups', result['Message'] ) return result if not result['Value']: return S_OK() pilotsToAccount = {} for ownerDN, ownerGroup, gridType, broker in result['Value']: if not gridType in self.eligibleGridTypes: continue self.log.verbose( 'Getting pilots for %s:%s @ %s %s' % ( ownerDN, ownerGroup, gridType, broker ) ) condDict1 = {'Status':'Done', 'StatusReason':'Report from JobAgent', 'OwnerDN':ownerDN, 'OwnerGroup':ownerGroup, 'GridType':gridType, 'Broker':broker} condDict2 = {'Status':self.queryStateList, 'OwnerDN':ownerDN, 'OwnerGroup':ownerGroup, 'GridType':gridType, 'Broker':broker} for condDict in [ condDict1, condDict2]: result = self.clearWaitingPilots( condDict ) if not result['OK']: self.log.warn( 'Failed to clear Waiting Pilot Jobs' ) result = self.pilotDB.selectPilots( condDict ) if not result['OK']: self.log.warn( 'Failed to get the Pilot Agents' ) return result if not result['Value']: continue refList = result['Value'] ret = gProxyManager.getPilotProxyFromDIRACGroup( ownerDN, ownerGroup ) if not ret['OK']: self.log.error( ret['Message'] ) self.log.error( 'Could not get proxy:', 'User "%s", Group "%s"' % ( ownerDN, ownerGroup ) ) continue proxy = ret['Value'] self.log.verbose( "Getting status for %s pilots for owner %s and group %s" % ( len( refList ), ownerDN, ownerGroup ) ) for start_index in range( 0, len( refList ), MAX_JOBS_QUERY ): refsToQuery = refList[ start_index : start_index + MAX_JOBS_QUERY ] self.log.verbose( 'Querying %d pilots of %s starting at %d' % ( len( refsToQuery ), len( refList ), start_index ) ) result = self.getPilotStatus( proxy, gridType, refsToQuery ) if not result['OK']: if result['Message'] == 'Broker not Available': self.log.error( 'Broker %s not Available' % broker ) break self.log.warn( 'Failed to get pilot status:' ) self.log.warn( '%s:%s @ %s' % ( ownerDN, ownerGroup, gridType ) ) continue statusDict = result[ 'Value' ] for pRef in statusDict: pDict = statusDict[ pRef ] if pDict: if pDict['isParent']: self.log.verbose( 'Clear parametric parent %s' % pRef ) result = self.clearParentJob( pRef, pDict, connection ) if not result['OK']: self.log.warn( result['Message'] ) else: self.log.info( 'Parametric parent removed: %s' % pRef ) if pDict[ 'FinalStatus' ]: self.log.verbose( 'Marking Status for %s to %s' % ( pRef, pDict['Status'] ) ) pilotsToAccount[ pRef ] = pDict else: self.log.verbose( 'Setting Status for %s to %s' % ( pRef, pDict['Status'] ) ) result = self.pilotDB.setPilotStatus( pRef, pDict['Status'], pDict['DestinationSite'], updateTime = pDict['StatusDate'], conn = connection ) if len( pilotsToAccount ) > 100: self.accountPilots( pilotsToAccount, connection ) pilotsToAccount = {} self.accountPilots( pilotsToAccount, connection ) # Now handle pilots not updated in the last N days (most likely the Broker is no # longer available) and declare them Deleted. result = self.handleOldPilots( connection ) connection.close() return S_OK() def clearWaitingPilots( self, condDict ): """ Clear pilots in the faulty Waiting state """ last_update = Time.dateTime() - MAX_WAITING_STATE_LENGTH * Time.hour clearDict = {'Status':'Waiting', 'OwnerDN':condDict['OwnerDN'], 'OwnerGroup':condDict['OwnerGroup'], 'GridType':condDict['GridType'], 'Broker':condDict['Broker']} result = self.pilotDB.selectPilots( clearDict, older = last_update ) if not result['OK']: self.log.warn( 'Failed to get the Pilot Agents for Waiting state' ) return result if not result['Value']: return S_OK() refList = result['Value'] for pilotRef in refList: self.log.info( 'Setting Waiting pilot to Aborted: %s' % pilotRef ) result = self.pilotDB.setPilotStatus( pilotRef, 'Stalled', statusReason = 'Exceeded max waiting time' ) return S_OK() def clearParentJob( self, pRef, pDict, connection ): """ Clear the parameteric parent job from the PilotAgentsDB """ childList = pDict['ChildRefs'] # Check that at least one child is in the database children_ok = False for child in childList: result = self.pilotDB.getPilotInfo( child, conn = connection ) if result['OK']: if result['Value']: children_ok = True if children_ok: return self.pilotDB.deletePilot( pRef, conn = connection ) else: self.log.verbose( 'Adding children for parent %s' % pRef ) result = self.pilotDB.getPilotInfo( pRef ) parentInfo = result['Value'][pRef] tqID = parentInfo['TaskQueueID'] ownerDN = parentInfo['OwnerDN'] ownerGroup = parentInfo['OwnerGroup'] broker = parentInfo['Broker'] gridType = parentInfo['GridType'] result = self.pilotDB.addPilotTQReference( childList, tqID, ownerDN, ownerGroup, broker = broker, gridType = gridType ) if not result['OK']: return result children_added = True for chRef, chDict in pDict['ChildDicts'].items(): result = self.pilotDB.setPilotStatus( chRef, chDict['Status'], destination = chDict['DestinationSite'], conn = connection ) if not result['OK']: children_added = False if children_added : result = self.pilotDB.deletePilot( pRef, conn = connection ) else: return S_ERROR( 'Failed to add children' ) return S_OK() def handleOldPilots( self, connection ): """ select all pilots that have not been updated in the last N days and declared them Deleted, accounting for them. """ pilotsToAccount = {} timeLimitToConsider = Time.toString( Time.dateTime() - Time.day * self.pilotStalledDays ) # A.T. Below looks to be a bug #result = self.pilotDB.selectPilots( {'Status':self.queryStateList} , older=None, timeStamp='LastUpdateTime' ) result = self.pilotDB.selectPilots( { 'Status':self.queryStateList} , older = timeLimitToConsider, timeStamp = 'LastUpdateTime' ) if not result['OK']: self.log.error( 'Failed to get the Pilot Agents' ) return result if not result['Value']: return S_OK() refList = result['Value'] result = self.pilotDB.getPilotInfo( refList ) if not result['OK']: self.log.error( 'Failed to get Info for Pilot Agents' ) return result pilotsDict = result['Value'] for pRef in pilotsDict: if pilotsDict[pRef].has_key('Jobs') and len(pilotsDict[pRef]['Jobs']) > 0 and self._checkJobLastUpdateTime(pilotsDict[pRef]['Jobs'],self.pilotStalledDays): self.log.debug('%s should not be deleted since one job of %s is running.' % ( str(pRef) , str(pilotsDict[pRef]['Jobs']) ) ) continue deletedJobDict = pilotsDict[pRef] deletedJobDict['Status'] = 'Deleted' deletedJobDict['StatusDate'] = Time.dateTime() pilotsToAccount[ pRef ] = deletedJobDict if len( pilotsToAccount ) > 100: self.accountPilots( pilotsToAccount, connection ) self._killPilots( pilotsToAccount ) pilotsToAccount = {} self.accountPilots( pilotsToAccount, connection ) self._killPilots( pilotsToAccount ) return S_OK() def accountPilots( self, pilotsToAccount, connection ): """ account for pilots """ accountingFlag = False pae = self.am_getOption( 'PilotAccountingEnabled', 'yes' ) if pae.lower() == "yes": accountingFlag = True if not pilotsToAccount: self.log.info( 'No pilots to Account' ) return S_OK() accountingSent = False if accountingFlag: retVal = self.pilotDB.getPilotInfo( pilotsToAccount.keys(), conn = connection ) if not retVal['OK']: self.log.error( 'Fail to retrieve Info for pilots', retVal['Message'] ) return retVal dbData = retVal[ 'Value' ] for pref in dbData: if pref in pilotsToAccount: if dbData[pref][ 'Status' ] not in self.finalStateList: dbData[pref][ 'Status' ] = pilotsToAccount[pref][ 'Status' ] dbData[pref][ 'DestinationSite' ] = pilotsToAccount[pref][ 'DestinationSite' ] dbData[pref][ 'LastUpdateTime' ] = pilotsToAccount[pref][ 'StatusDate' ] retVal = self.__addPilotsAccountingReport( dbData ) if not retVal['OK']: self.log.error( 'Fail to retrieve Info for pilots', retVal['Message'] ) return retVal self.log.info( "Sending accounting records..." ) retVal = gDataStoreClient.commit() if not retVal[ 'OK' ]: self.log.error( "Can't send accounting reports", retVal[ 'Message' ] ) else: self.log.info( "Accounting sent for %s pilots" % len( pilotsToAccount ) ) accountingSent = True if not accountingFlag or accountingSent: for pRef in pilotsToAccount: pDict = pilotsToAccount[pRef] self.log.verbose( 'Setting Status for %s to %s' % ( pRef, pDict['Status'] ) ) self.pilotDB.setPilotStatus( pRef, pDict['Status'], pDict['DestinationSite'], pDict['StatusDate'], conn = connection ) return S_OK() ############################################################################# def getPilotStatus( self, proxy, gridType, pilotRefList ): """ Get GRID job status information using the job's owner proxy and GRID job IDs. Returns for each JobID its status in the GRID WMS and its destination CE as a tuple of 2 elements """ if gridType == 'LCG': cmd = [ 'edg-job-status' ] elif gridType == 'gLite': cmd = [ 'glite-wms-job-status' ] else: return S_ERROR() cmd.extend( pilotRefList ) start = time.time() ret = executeGridCommand( proxy, cmd, self.gridEnv ) self.log.info( '%s Job Status Execution Time for %d jobs:' % ( gridType, len( pilotRefList ) ), time.time() - start ) if not ret['OK']: self.log.error( 'Failed to execute %s Job Status' % gridType, ret['Message'] ) return S_ERROR() if ret['Value'][0] != 0: stderr = ret['Value'][2] stdout = ret['Value'][1] deleted = 0 resultDict = {} status = 'Deleted' destination = 'Unknown' deletedJobDict = { 'Status': status, 'DestinationSite': destination, 'StatusDate': Time.dateTime(), 'isChild': False, 'isParent': False, 'ParentRef': False, 'FinalStatus' : status in self.finalStateList, 'ChildRefs' : [] } # Glite returns this error for Deleted jobs to std.err for job in List.fromChar( stderr, '\nUnable to retrieve the status for:' )[1:]: pRef = List.fromChar( job, '\n' )[0].strip() resultDict[pRef] = deletedJobDict self.pilotDB.setPilotStatus( pRef, "Deleted" ) deleted += 1 # EDG returns a similar error for Deleted jobs to std.out for job in List.fromChar( stdout, '\nUnable to retrieve the status for:' )[1:]: pRef = List.fromChar( job, '\n' )[0].strip() if re.search( "No such file or directory: no matching jobs found", job ): resultDict[pRef] = deletedJobDict self.pilotDB.setPilotStatus( pRef, "Deleted" ) deleted += 1 if re.search( "edg_wll_JobStatus: Connection refused: edg_wll_ssl_connect()", job ): # the Broker is not accesible return S_ERROR( 'Broker not Available' ) if not deleted: self.log.error( 'Error executing %s Job Status:' % gridType, str( ret['Value'][0] ) + '\n'.join( ret['Value'][1:3] ) ) return S_ERROR() return S_OK( resultDict ) stdout = ret['Value'][1] stderr = ret['Value'][2] resultDict = {} for job in List.fromChar( stdout, '\nStatus info for the Job :' )[1:]: pRef = List.fromChar( job, '\n' )[0].strip() resultDict[pRef] = self.__parseJobStatus( job, gridType ) return S_OK( resultDict ) def __parseJobStatus( self, job, gridType ): """ Parse output of grid pilot status command """ statusRE = 'Current Status:\s*(\w*)' destinationRE = 'Destination:\s*([\w\.-]*)' statusDateLCGRE = 'reached on:\s*....(.*)' submittedDateRE = 'Submitted:\s*....(.*)' statusFailedRE = 'Current Status:.*\(Failed\)' status = None destination = 'Unknown' statusDate = None submittedDate = None try: status = re.search( statusRE, job ).group( 1 ) if status == 'Done' and re.search( statusFailedRE, job ): status = 'Failed' if re.search( destinationRE, job ): destination = re.search( destinationRE, job ).group( 1 ) if gridType == 'LCG' and re.search( statusDateLCGRE, job ): statusDate = re.search( statusDateLCGRE, job ).group( 1 ) statusDate = time.strftime( '%Y-%m-%d %H:%M:%S', time.strptime( statusDate, '%b %d %H:%M:%S %Y' ) ) if gridType == 'gLite' and re.search( submittedDateRE, job ): submittedDate = re.search( submittedDateRE, job ).group( 1 ) submittedDate = time.strftime( '%Y-%m-%d %H:%M:%S', time.strptime( submittedDate, '%b %d %H:%M:%S %Y %Z' ) ) except: self.log.exception( 'Error parsing %s Job Status output:\n' % gridType, job ) isParent = False if re.search( 'Nodes information', job ): isParent = True isChild = False if re.search( 'Parent Job', job ): isChild = True if status == "Running": # Pilots can be in Running state for too long, due to bugs in the WMS if statusDate: statusTime = Time.fromString( statusDate ) delta = Time.dateTime() - statusTime if delta > 4 * Time.day: self.log.info( 'Setting pilot status to Deleted after 4 days in Running' ) status = "Deleted" statusDate = statusTime + 4 * Time.day elif submittedDate: statusTime = Time.fromString( submittedDate ) delta = Time.dateTime() - statusTime if delta > 7 * Time.day: self.log.info( 'Setting pilot status to Deleted more than 7 days after submission still in Running' ) status = "Deleted" statusDate = statusTime + 7 * Time.day childRefs = [] childDicts = {} if isParent: for subjob in List.fromChar( job, ' Status info for the Job :' )[1:]: chRef = List.fromChar( subjob, '\n' )[0].strip() childDict = self.__parseJobStatus( subjob, gridType ) childRefs.append( chRef ) childDicts[chRef] = childDict return { 'Status': status, 'DestinationSite': destination, 'StatusDate': statusDate, 'isChild': isChild, 'isParent': isParent, 'ParentRef': False, 'FinalStatus' : status in self.finalStateList, 'ChildRefs' : childRefs, 'ChildDicts' : childDicts } def __addPilotsAccountingReport( self, pilotsData ): """ fill accounting data """ for pRef in pilotsData: pData = pilotsData[pRef] pA = PilotAccounting() pA.setEndTime( pData[ 'LastUpdateTime' ] ) pA.setStartTime( pData[ 'SubmissionTime' ] ) retVal = CS.getUsernameForDN( pData[ 'OwnerDN' ] ) if not retVal[ 'OK' ]: userName = '******' self.log.error( "Can't determine username for dn:", pData[ 'OwnerDN' ] ) else: userName = retVal[ 'Value' ] pA.setValueByKey( 'User', userName ) pA.setValueByKey( 'UserGroup', pData[ 'OwnerGroup' ] ) result = getSiteForCE( pData[ 'DestinationSite' ] ) if result['OK'] and result[ 'Value' ].strip(): pA.setValueByKey( 'Site', result['Value'].strip() ) else: pA.setValueByKey( 'Site', 'Unknown' ) pA.setValueByKey( 'GridCE', pData[ 'DestinationSite' ] ) pA.setValueByKey( 'GridMiddleware', pData[ 'GridType' ] ) pA.setValueByKey( 'GridResourceBroker', pData[ 'Broker' ] ) pA.setValueByKey( 'GridStatus', pData[ 'Status' ] ) if not 'Jobs' in pData: pA.setValueByKey( 'Jobs', 0 ) else: pA.setValueByKey( 'Jobs', len( pData['Jobs'] ) ) self.log.verbose( "Added accounting record for pilot %s" % pData[ 'PilotID' ] ) retVal = gDataStoreClient.addRegister( pA ) if not retVal[ 'OK' ]: return retVal return S_OK() def _killPilots( self, acc ): for i in sorted(acc.keys()): result = self.diracadmin.getPilotInfo( i ) if result['OK'] and result['Value'].has_key(i) and result['Value'][i].has_key('Status'): ret = self.diracadmin.killPilot( str(i) ) if ret['OK']: self.log.info("Successfully deleted: %s (Status : %s)" % (i, result['Value'][i]['Status'] ) ) else: self.log.error( "Failed to delete pilot: ", "%s : %s" % ( i, ret['Message'] ) ) else: self.log.error( "Failed to get pilot info", "%s : %s" % ( i, str( result ) ) ) def _checkJobLastUpdateTime( self, joblist , StalledDays ): timeLimitToConsider = Time.dateTime() - Time.day * StalledDays ret = False for jobID in joblist: result = self.jobDB.getJobAttributes( int( jobID ) ) if result['OK']: if 'LastUpdateTime' in result['Value']: lastUpdateTime = result['Value']['LastUpdateTime'] if Time.fromString( lastUpdateTime ) > timeLimitToConsider: ret = True self.log.debug( 'Since %s updates LastUpdateTime on %s this does not to need to be deleted.' % ( str( jobID ), str( lastUpdateTime ) ) ) break else: self.log.error( "Error taking job info from DB", result['Message'] ) return ret
Remove User from Configuration """ __RCSID__ = "$Id$" import DIRAC from DIRAC.Core.Base import Script Script.setUsageMessage('\n'.join([ __doc__.split('\n')[1], 'Usage:', ' %s [option|cfgfile] ... User ...' % Script.scriptName, 'Arguments:', ' User: User name' ])) Script.parseCommandLine(ignoreErrors=True) args = Script.getPositionalArgs() from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin diracAdmin = DiracAdmin() exitCode = 0 errorList = [] if len(args) < 1: Script.showHelp() choice = raw_input("Are you sure you want to delete user/s %s? yes/no [no]: " % ", ".join(args)) choice = choice.lower() if choice not in ("yes", "y"): print "Delete aborted" DIRAC.exit(0) for user in args: if not diracAdmin.csDeleteUser(user):
from DIRAC.Core.Base import Script Script.registerSwitch("e", "extended", "Show extended info") Script.setUsageMessage('\n'.join([ __doc__.split('\n')[1], 'Usage:', ' %s [option|cfgfile] ... [Group] ...' % Script.scriptName, 'Arguments:', ' Group: Only users from this group (default: all)' ])) Script.parseCommandLine(ignoreErrors=True) args = Script.getPositionalArgs() if len(args) == 0: args = ['all'] from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin diracAdmin = DiracAdmin() exitCode = 0 errorList = [] extendedInfo = False for unprocSw in Script.getUnprocessedSwitches(): if unprocSw[0] in ('e', 'extended'): extendedInfo = True def printUsersInGroup(group=False): result = diracAdmin.csListUsers(group) if result['OK']: if group: print("Users in group %s:" % group) else:
def main(): global groupName global groupProperties global userNames Script.registerSwitch("G:", "GroupName:", "Name of the Group (Mandatory)", setGroupName) Script.registerSwitch( "U:", "UserName:"******"Short Name of user to be added to the Group (Allow Multiple instances or None)", addUserName) Script.registerSwitch( "P:", "Property:", "Property to be added to the Group (Allow Multiple instances or None)", addProperty) # Registering arguments will automatically add their description to the help menu Script.registerArgument([ "Property=<Value>: Other properties to be added to the Group like (VOMSRole=XXXX)" ], mandatory=False) _, args = Script.parseCommandLine(ignoreErrors=True) if groupName is None: Script.showHelp(exitCode=1) from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin diracAdmin = DiracAdmin() exitCode = 0 errorList = [] groupProps = {} if userNames: groupProps["Users"] = ", ".join(userNames) if groupProperties: groupProps["Properties"] = ", ".join(groupProperties) for prop in args: pl = prop.split("=") if len(pl) < 2: errorList.append( ("in arguments", "Property %s has to include a '=' to separate name from value" % prop)) exitCode = 255 else: pName = pl[0] pValue = "=".join(pl[1:]) gLogger.info("Setting property %s to %s" % (pName, pValue)) groupProps[pName] = pValue if not diracAdmin.csModifyGroup( groupName, groupProps, createIfNonExistant=True)["OK"]: errorList.append(("add group", "Cannot register group %s" % groupName)) exitCode = 255 else: result = diracAdmin.csCommitChanges() if not result["OK"]: errorList.append(("commit", result["Message"])) exitCode = 255 for error in errorList: gLogger.error("%s: %s" % error) DIRAC.exit(exitCode)
Script.setUsageMessage('\n'.join([ __doc__.split('\n')[1], 'Usage:', ' %s [option|cfgfile] ... user DN group [group] ...' % Script.scriptName, 'Arguments:', ' user: User name', ' DN: DN of the User', ' group: Add the user to the group' ])) Script.parseCommandLine(ignoreErrors=True) args = Script.getPositionalArgs() if len(args) < 3: Script.showHelp() from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin diracAdmin = DiracAdmin() exitCode = 0 forceCreation = False errorList = [] userProps = {} for unprocSw in Script.getUnprocessedSwitches(): if unprocSw[0] in ("f", "force"): forceCreation = True elif unprocSw[0] in ("p", "property"): prop = unprocSw[1] pl = prop.split("=") if len(pl) < 2: errorList.append( ("in arguments", "Property %s has to include a '=' to separate name from value"
# File : dirac-admin-get-site-mask # Author : Stuart Paterson ######################################################################## __RCSID__ = "$Id$" from DIRAC.Core.Base import Script Script.setUsageMessage(""" Get the list of sites enabled in the mask for job submission Usage: %s [options] """ % Script.scriptName) Script.parseCommandLine(ignoreErrors=True) import DIRAC from DIRAC import gLogger from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin diracAdmin = DiracAdmin() gLogger.setLevel('ALWAYS') result = diracAdmin.getSiteMask(printOutput=True) if result['OK']: DIRAC.exit(0) else: print result['Message'] DIRAC.exit(2)
#!/usr/bin/env python ######################################################################## # File : dirac-admin-get-banned-sites # Author : Stuart Paterson ######################################################################## __RCSID__ = "$Id$" from DIRAC.Core.Base import Script Script.parseCommandLine(ignoreErrors=True) args = Script.getPositionalArgs() from DIRAC import exit as DIRACExit from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin diracAdmin = DiracAdmin() result = diracAdmin.getBannedSites() if result['OK']: bannedSites = result['Value'] else: print result['Message'] DIRACExit(2) for site in bannedSites: result = diracAdmin.getSiteMaskLogging(site) if result['OK']: for siteLog in result['Value']: print '%-30s %s %s %s' % (site, siteLog[0], siteLog[1], siteLog[2]) else: print '%-30s %s' % (site, result['Message'])
def main(): Script.registerSwitch("E:", "email=", "Boolean True/False (True by default)") # Registering arguments will automatically add their description to the help menu Script.registerArgument("Site: Name of the Site") Script.registerArgument("Comment: Reason of the action") Script.parseCommandLine(ignoreErrors=True) from DIRAC import exit as DIRACExit, gConfig, gLogger from DIRAC.Core.Utilities.PromptUser import promptUser from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin from DIRAC.ConfigurationSystem.Client.Helpers.Operations import Operations def getBoolean(value): if value.lower() == "true": return True elif value.lower() == "false": return False else: Script.showHelp() email = True for switch in Script.getUnprocessedSwitches(): if switch[0] == "email": email = getBoolean(switch[1]) diracAdmin = DiracAdmin() exitCode = 0 errorList = [] setup = gConfig.getValue("/DIRAC/Setup", "") if not setup: print("ERROR: Could not contact Configuration Service") exitCode = 2 DIRACExit(exitCode) # result = promptUser( # 'All the elements that are associated with this site will be banned,' # 'are you sure about this action?' # ) # if not result['OK'] or result['Value'] is 'n': # print 'Script stopped' # DIRACExit( 0 ) # parseCommandLine show help when mandatory arguments are not specified or incorrect argument site, comment = Script.getPositionalArgs(group=True) result = diracAdmin.banSite(site, comment, printOutput=True) if not result["OK"]: errorList.append((site, result["Message"])) exitCode = 2 else: if email: userName = diracAdmin._getCurrentUser() if not userName["OK"]: print("ERROR: Could not obtain current username from proxy") exitCode = 2 DIRACExit(exitCode) userName = userName["Value"] subject = "%s is banned for %s setup" % (site, setup) body = "Site %s is removed from site mask for %s setup by %s on %s.\n\n" % ( site, setup, userName, time.asctime(), ) body += "Comment:\n%s" % comment addressPath = "EMail/Production" address = Operations().getValue(addressPath, "") if not address: gLogger.notice( "'%s' not defined in Operations, can not send Mail\n" % addressPath, body) else: result = diracAdmin.sendMail(address, subject, body) else: print("Automatic email disabled by flag.") for error in errorList: print("ERROR %s: %s" % error) DIRACExit(exitCode)
from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin if not len( args ) == 1: Script.showHelp() ce = args[0] host = None vo = getVO( 'lhcb' ) for unprocSw in Script.getUnprocessedSwitches(): if unprocSw[0] in ( "H", "host" ): host = unprocSw[1] if unprocSw[0] in ( "V", "vo" ): vo = unprocSw[1] diracAdmin = DiracAdmin() result = diracAdmin.getBDIICEState( ce, useVO = vo, host = host ) if not result['OK']: print result['Message'] DIRAC.exit( 2 ) ces = result['Value'] for ce in ces: print "CE: %s {" % ce.get( 'GlueCEUniqueID', 'Unknown' ) for item in ce.iteritems(): print "%s: %s" % item print "}"
def main(): global hostName global hostDN global hostProperties Script.registerSwitch('H:', 'HostName:', 'Name of the Host (Mandatory)', setHostName) Script.registerSwitch('D:', 'HostDN:', 'DN of the Host Certificate (Mandatory)', setHostDN) Script.registerSwitch( 'P:', 'Property:', 'Property to be added to the Host (Allow Multiple instances or None)', addProperty) Script.parseCommandLine(ignoreErrors=True) if hostName is None or hostDN is None: Script.showHelp(exitCode=1) args = Script.getPositionalArgs() from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin diracAdmin = DiracAdmin() exitCode = 0 errorList = [] hostProps = {'DN': hostDN} if hostProperties: hostProps['Properties'] = ', '.join(hostProperties) for prop in args: pl = prop.split("=") if len(pl) < 2: errorList.append( ("in arguments", "Property %s has to include a '=' to separate name from value" % prop)) exitCode = 255 else: pName = pl[0] pValue = "=".join(pl[1:]) Script.gLogger.info("Setting property %s to %s" % (pName, pValue)) hostProps[pName] = pValue if not diracAdmin.csModifyHost( hostName, hostProps, createIfNonExistant=True)['OK']: errorList.append(("add host", "Cannot register host %s" % hostName)) exitCode = 255 else: result = diracAdmin.csCommitChanges() if not result['OK']: errorList.append(("commit", result['Message'])) exitCode = 255 if exitCode == 0: from DIRAC.FrameworkSystem.Client.ComponentMonitoringClient import ComponentMonitoringClient cmc = ComponentMonitoringClient() ret = cmc.hostExists(dict(HostName=hostName)) if not ret['OK']: Script.gLogger.error( 'Cannot check if host is registered in ComponentMonitoring', ret['Message']) elif ret['Value']: Script.gLogger.info( 'Host already registered in ComponentMonitoring') else: ret = cmc.addHost(dict(HostName=hostName, CPU='TO_COME')) if not ret['OK']: Script.gLogger.error( 'Failed to add Host to ComponentMonitoring', ret['Message']) for error in errorList: Script.gLogger.error("%s: %s" % error) DIRAC.exit(exitCode)
#!/usr/bin/env python ######################################################################## # File : dirac-admin-get-banned-sites # Author : Stuart Paterson ######################################################################## __RCSID__ = "$Id$" from DIRAC.Core.Base import Script Script.parseCommandLine( ignoreErrors = True ) args = Script.getPositionalArgs() from DIRAC import exit as DIRACExit from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin diracAdmin = DiracAdmin() result = diracAdmin.getBannedSites( printOutput = False ) if result['OK']: banned_sites = result['Value'] else: print result['Message'] DIRACExit( 2 ) for site in banned_sites: result = diracAdmin.getSiteMaskLogging( site ) if result['OK']: sites = result['Value'] print '%-30s %s %s %s' % ( site, sites[site][-1][1], sites[site][-1][2], sites[site][-1][3] ) else: print '%-30s %s' % ( site, result['Message'] )
check = False if switch[0].lower() == "m" or switch[0].lower() == "mute": mute = True if switch[0] == "S" or switch[0].lower() == "site": site = switch[1] # from DIRAC.ConfigurationSystem.Client.CSAPI import CSAPI from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin from DIRAC.ConfigurationSystem.Client.Helpers.Operations import Operations from DIRAC import gConfig, gLogger from DIRAC.ResourceStatusSystem.Client.ResourceStatus import ResourceStatus from DIRAC.Core.Security.ProxyInfo import getProxyInfo # csAPI = CSAPI() diracAdmin = DiracAdmin() exitCode = 0 errorList = [] setup = gConfig.getValue( '/DIRAC/Setup', '' ) if not setup: print 'ERROR: Could not contact Configuration Service' exitCode = 2 DIRAC.exit( exitCode ) res = getProxyInfo() if not res[ 'OK' ]: gLogger.error( 'Failed to get proxy information', res[ 'Message' ] ) DIRAC.exit( 2 ) userName = res['Value'].get( 'username' ) if not userName:
from DIRAC.Core.Base import Script Script.setUsageMessage('\n'.join([__doc__.split('\n')[1], 'Usage:', ' %s [option|cfgfile] ... JobID' % Script.scriptName, 'Arguments:', ' JobID: DIRAC ID of the Job'])) Script.parseCommandLine(ignoreErrors=True) args = Script.getPositionalArgs() if len(args) < 1: Script.showHelp() from DIRAC import exit as DIRACExit from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin diracAdmin = DiracAdmin() exitCode = 0 errorList = [] for job in args: try: job = int(job) except BaseException as x: errorList.append((job, 'Expected integer for jobID')) exitCode = 2 continue result = diracAdmin.getJobPilots(job) if not result['OK']: errorList.append((job, result['Message']))
if ret['OK'] and 'group' in ret['Value']: voName = getVOForGroup( ret['Value']['group'] ) for unprocSw in Script.getUnprocessedSwitches(): if unprocSw[0] in ( "H", "host" ): host = unprocSw[1] if unprocSw[0] in ( "V", "vo" ): voName = unprocSw[1] if not voName: Script.gLogger.error( 'Could not determine VO' ) Script.showHelp() from DIRAC import exit as DIRACExit from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin diracAdmin = DiracAdmin() result = diracAdmin.getBDIISA( site, useVO = voName, host = host ) if not ['OK']: print result['Message'] DIRACExit( 2 ) sas = result['Value'] for sa in sas: print "SA: %s {" % sa.get( 'GlueChunkKey', 'Unknown' ) for item in sa.iteritems(): print "%s: %s" % item print "}"
from DIRAC import gConfig, gLogger from DIRAC.ResourceStatusSystem.Client.ResourceStatus import ResourceStatus from DIRAC.Core.Security.ProxyInfo import getProxyInfo from DIRAC.DataManagementSystem.Utilities.DMSHelpers import resolveSEGroup if not (read or write or check or remove): # No switch was specified, means we need all of them gLogger.notice( "No option given, all accesses will be allowed if they were not") read = True write = True check = True remove = True ses = resolveSEGroup(ses) diracAdmin = DiracAdmin() exitCode = 0 errorList = [] setup = gConfig.getValue('/DIRAC/Setup', '') if not setup: print 'ERROR: Could not contact Configuration Service' exitCode = 2 DIRAC.exit(exitCode) res = getProxyInfo() if not res['OK']: gLogger.error('Failed to get proxy information', res['Message']) DIRAC.exit(2) userName = res['Value'].get('username') if not userName:
class PilotStatusAgent(AgentModule): """ The specific agents must provide the following methods: - initialize() for initial settings - beginExecution() - execute() - the main method called in the agent cycle - endExecution() - finalize() - the graceful exit of the method, this one is usually used for the agent restart """ queryStateList = ['Ready', 'Submitted', 'Running', 'Waiting', 'Scheduled'] finalStateList = ['Done', 'Aborted', 'Cleared', 'Deleted', 'Failed'] def __init__(self, *args, **kwargs): """ c'tor """ AgentModule.__init__(self, *args, **kwargs) self.jobDB = None self.pilotDB = None self.diracadmin = None ############################################################################# def initialize(self): """Sets defaults """ self.am_setOption('PollingTime', 120) self.am_setOption('GridEnv', '') self.am_setOption('PilotStalledDays', 3) self.pilotDB = PilotAgentsDB() self.diracadmin = DiracAdmin() self.jobDB = JobDB() self.clearPilotsDelay = self.am_getOption('ClearPilotsDelay', 30) self.clearAbortedDelay = self.am_getOption('ClearAbortedPilotsDelay', 7) self.WMSAdministrator = WMSAdministratorClient() return S_OK() ############################################################################# def execute(self): """The PilotAgent execution method. """ self.pilotStalledDays = self.am_getOption('PilotStalledDays', 3) self.gridEnv = self.am_getOption('GridEnv') if not self.gridEnv: # No specific option found, try a general one setup = gConfig.getValue('/DIRAC/Setup', '') if setup: instance = gConfig.getValue('/DIRAC/Setups/%s/WorkloadManagement' % setup, '') if instance: self.gridEnv = gConfig.getValue('/Systems/WorkloadManagement/%s/GridEnv' % instance, '') result = self.pilotDB._getConnection() if result['OK']: connection = result['Value'] else: return result # Now handle pilots not updated in the last N days (most likely the Broker is no # longer available) and declare them Deleted. result = self.handleOldPilots(connection) connection.close() result = self.WMSAdministrator.clearPilots(self.clearPilotsDelay, self.clearAbortedDelay) if not result['OK']: self.log.warn('Failed to clear old pilots in the PilotAgentsDB') return S_OK() def clearWaitingPilots(self, condDict): """ Clear pilots in the faulty Waiting state """ last_update = Time.dateTime() - MAX_WAITING_STATE_LENGTH * Time.hour clearDict = {'Status': 'Waiting', 'OwnerDN': condDict['OwnerDN'], 'OwnerGroup': condDict['OwnerGroup'], 'GridType': condDict['GridType'], 'Broker': condDict['Broker']} result = self.pilotDB.selectPilots(clearDict, older=last_update) if not result['OK']: self.log.warn('Failed to get the Pilot Agents for Waiting state') return result if not result['Value']: return S_OK() refList = result['Value'] for pilotRef in refList: self.log.info('Setting Waiting pilot to Stalled: %s' % pilotRef) result = self.pilotDB.setPilotStatus(pilotRef, 'Stalled', statusReason='Exceeded max waiting time') return S_OK() def clearParentJob(self, pRef, pDict, connection): """ Clear the parameteric parent job from the PilotAgentsDB """ childList = pDict['ChildRefs'] # Check that at least one child is in the database children_ok = False for child in childList: result = self.pilotDB.getPilotInfo(child, conn=connection) if result['OK']: if result['Value']: children_ok = True if children_ok: return self.pilotDB.deletePilot(pRef, conn=connection) else: self.log.verbose('Adding children for parent %s' % pRef) result = self.pilotDB.getPilotInfo(pRef) parentInfo = result['Value'][pRef] tqID = parentInfo['TaskQueueID'] ownerDN = parentInfo['OwnerDN'] ownerGroup = parentInfo['OwnerGroup'] broker = parentInfo['Broker'] gridType = parentInfo['GridType'] result = self.pilotDB.addPilotTQReference(childList, tqID, ownerDN, ownerGroup, broker=broker, gridType=gridType) if not result['OK']: return result children_added = True for chRef, chDict in pDict['ChildDicts'].items(): result = self.pilotDB.setPilotStatus(chRef, chDict['Status'], destination=chDict['DestinationSite'], conn=connection) if not result['OK']: children_added = False if children_added: result = self.pilotDB.deletePilot(pRef, conn=connection) else: return S_ERROR('Failed to add children') return S_OK() def handleOldPilots(self, connection): """ select all pilots that have not been updated in the last N days and declared them Deleted, accounting for them. """ pilotsToAccount = {} timeLimitToConsider = Time.toString(Time.dateTime() - Time.day * self.pilotStalledDays) result = self.pilotDB.selectPilots({'Status': self.queryStateList}, older=timeLimitToConsider, timeStamp='LastUpdateTime') if not result['OK']: self.log.error('Failed to get the Pilot Agents') return result if not result['Value']: return S_OK() refList = result['Value'] result = self.pilotDB.getPilotInfo(refList) if not result['OK']: self.log.error('Failed to get Info for Pilot Agents') return result pilotsDict = result['Value'] for pRef in pilotsDict: if pilotsDict[pRef].get('Jobs') and self._checkJobLastUpdateTime(pilotsDict[pRef]['Jobs'], self.pilotStalledDays): self.log.debug('%s should not be deleted since one job of %s is running.' % (str(pRef), str(pilotsDict[pRef]['Jobs']))) continue deletedJobDict = pilotsDict[pRef] deletedJobDict['Status'] = 'Deleted' deletedJobDict['StatusDate'] = Time.dateTime() pilotsToAccount[pRef] = deletedJobDict if len(pilotsToAccount) > 100: self.accountPilots(pilotsToAccount, connection) self._killPilots(pilotsToAccount) pilotsToAccount = {} self.accountPilots(pilotsToAccount, connection) self._killPilots(pilotsToAccount) return S_OK() def accountPilots(self, pilotsToAccount, connection): """ account for pilots """ accountingFlag = False pae = self.am_getOption('PilotAccountingEnabled', 'yes') if pae.lower() == "yes": accountingFlag = True if not pilotsToAccount: self.log.info('No pilots to Account') return S_OK() accountingSent = False if accountingFlag: retVal = self.pilotDB.getPilotInfo(pilotsToAccount.keys(), conn=connection) if not retVal['OK']: self.log.error('Fail to retrieve Info for pilots', retVal['Message']) return retVal dbData = retVal['Value'] for pref in dbData: if pref in pilotsToAccount: if dbData[pref]['Status'] not in self.finalStateList: dbData[pref]['Status'] = pilotsToAccount[pref]['Status'] dbData[pref]['DestinationSite'] = pilotsToAccount[pref]['DestinationSite'] dbData[pref]['LastUpdateTime'] = pilotsToAccount[pref]['StatusDate'] retVal = self.__addPilotsAccountingReport(dbData) if not retVal['OK']: self.log.error('Fail to retrieve Info for pilots', retVal['Message']) return retVal self.log.info("Sending accounting records...") retVal = gDataStoreClient.commit() if not retVal['OK']: self.log.error("Can't send accounting reports", retVal['Message']) else: self.log.info("Accounting sent for %s pilots" % len(pilotsToAccount)) accountingSent = True if not accountingFlag or accountingSent: for pRef in pilotsToAccount: pDict = pilotsToAccount[pRef] self.log.verbose('Setting Status for %s to %s' % (pRef, pDict['Status'])) self.pilotDB.setPilotStatus(pRef, pDict['Status'], pDict['DestinationSite'], pDict['StatusDate'], conn=connection) return S_OK() def __addPilotsAccountingReport(self, pilotsData): """ fill accounting data """ for pRef in pilotsData: pData = pilotsData[pRef] pA = PilotAccounting() pA.setEndTime(pData['LastUpdateTime']) pA.setStartTime(pData['SubmissionTime']) retVal = CS.getUsernameForDN(pData['OwnerDN']) if not retVal['OK']: userName = '******' self.log.error("Can't determine username for dn:", pData['OwnerDN']) else: userName = retVal['Value'] pA.setValueByKey('User', userName) pA.setValueByKey('UserGroup', pData['OwnerGroup']) result = getSiteForCE(pData['DestinationSite']) if result['OK'] and result['Value'].strip(): pA.setValueByKey('Site', result['Value'].strip()) else: pA.setValueByKey('Site', 'Unknown') pA.setValueByKey('GridCE', pData['DestinationSite']) pA.setValueByKey('GridMiddleware', pData['GridType']) pA.setValueByKey('GridResourceBroker', pData['Broker']) pA.setValueByKey('GridStatus', pData['Status']) if 'Jobs' not in pData: pA.setValueByKey('Jobs', 0) else: pA.setValueByKey('Jobs', len(pData['Jobs'])) self.log.verbose("Added accounting record for pilot %s" % pData['PilotID']) retVal = gDataStoreClient.addRegister(pA) if not retVal['OK']: return retVal return S_OK() def _killPilots(self, acc): for i in sorted(acc.keys()): result = self.diracadmin.getPilotInfo(i) if result['OK'] and i in result['Value'] and 'Status' in result['Value'][i]: ret = self.diracadmin.killPilot(str(i)) if ret['OK']: self.log.info("Successfully deleted: %s (Status : %s)" % (i, result['Value'][i]['Status'])) else: self.log.error("Failed to delete pilot: ", "%s : %s" % (i, ret['Message'])) else: self.log.error("Failed to get pilot info", "%s : %s" % (i, str(result))) def _checkJobLastUpdateTime(self, joblist, StalledDays): timeLimitToConsider = Time.dateTime() - Time.day * StalledDays ret = False for jobID in joblist: result = self.jobDB.getJobAttributes(int(jobID)) if result['OK']: if 'LastUpdateTime' in result['Value']: lastUpdateTime = result['Value']['LastUpdateTime'] if Time.fromString(lastUpdateTime) > timeLimitToConsider: ret = True self.log.debug( 'Since %s updates LastUpdateTime on %s this does not to need to be deleted.' % (str(jobID), str(lastUpdateTime))) break else: self.log.error("Error taking job info from DB", result['Message']) return ret
def main(): global hostName global hostDN global hostProperties Script.registerSwitch("H:", "HostName:", "Name of the Host (Mandatory)", setHostName) Script.registerSwitch("D:", "HostDN:", "DN of the Host Certificate (Mandatory)", setHostDN) Script.registerSwitch( "P:", "Property:", "Property to be added to the Host (Allow Multiple instances or None)", addProperty) # Registering arguments will automatically add their description to the help menu Script.registerArgument([ "Property=<Value>: Other properties to be added to the Host like (Responsible=XXX)" ], mandatory=False) _, args = Script.parseCommandLine(ignoreErrors=True) if hostName is None or hostDN is None: Script.showHelp(exitCode=1) from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin diracAdmin = DiracAdmin() exitCode = 0 errorList = [] hostProps = {"DN": hostDN} if hostProperties: hostProps["Properties"] = ", ".join(hostProperties) for prop in args: pl = prop.split("=") if len(pl) < 2: errorList.append( ("in arguments", "Property %s has to include a '=' to separate name from value" % prop)) exitCode = 255 else: pName = pl[0] pValue = "=".join(pl[1:]) gLogger.info("Setting property %s to %s" % (pName, pValue)) hostProps[pName] = pValue if not diracAdmin.csModifyHost( hostName, hostProps, createIfNonExistant=True)["OK"]: errorList.append(("add host", "Cannot register host %s" % hostName)) exitCode = 255 else: result = diracAdmin.csCommitChanges() if not result["OK"]: errorList.append(("commit", result["Message"])) exitCode = 255 if exitCode == 0: from DIRAC.FrameworkSystem.Client.ComponentMonitoringClient import ComponentMonitoringClient cmc = ComponentMonitoringClient() ret = cmc.hostExists(dict(HostName=hostName)) if not ret["OK"]: gLogger.error( "Cannot check if host is registered in ComponentMonitoring", ret["Message"]) elif ret["Value"]: gLogger.info("Host already registered in ComponentMonitoring") else: ret = cmc.addHost(dict(HostName=hostName, CPU="TO_COME")) if not ret["OK"]: gLogger.error("Failed to add Host to ComponentMonitoring", ret["Message"]) for error in errorList: gLogger.error("%s: %s" % error) DIRAC.exit(exitCode)
import DIRAC from DIRAC.Core.Base import Script Script.setUsageMessage( '\n'.join( [ __doc__.split( '\n' )[1], 'Usage:', ' %s [option|cfgfile] ... Site ...' % Script.scriptName, 'Arguments:', ' Site: Name of the Site' ] ) ) Script.parseCommandLine( ignoreErrors = True ) args = Script.getPositionalArgs() if len( args ) < 1: Script.showHelp() from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin diracAdmin = DiracAdmin() exitCode = 0 errorList = [] for site in args: result = diracAdmin.getSiteSection( site, printOutput = True ) if not result['OK']: errorList.append( ( site, result['Message'] ) ) exitCode = 2 for error in errorList: print "ERROR %s: %s" % error DIRAC.exit( exitCode )
class TokenAgent( AgentModule ): ''' TokenAgent is in charge of checking tokens assigned on resources. Notifications are sent to those users owning expiring tokens. ''' # Rss token __rssToken = 'rs_svc' def __init__( self, *args, **kwargs ): ''' c'tor ''' AgentModule.__init__( self, *args, **kwargs ) self.notifyHours = 12 self.adminMail = '' self.rsClient = None self.tokenDict = {} self.diracAdmin = None def initialize( self ): ''' TokenAgent initialization ''' self.notifyHours = self.am_getOption( 'notifyHours', self.notifyHours ) self.adminMail = self.am_getOption( 'adminMail', self.adminMail ) self.rsClient = ResourceStatusClient() self.diracAdmin = DiracAdmin() return S_OK() def execute( self ): ''' Looks for user tokens. If they are expired, or expiring, it notifies users. ''' # Initialized here, as it is needed empty at the beginning of the execution self.tokenDict = {} elements = ( 'Site', 'Resource', 'Node' ) for element in elements: self.log.info( 'Processing %s' % element ) interestingTokens = self._getInterestingTokens( element ) if not interestingTokens[ 'OK' ]: self.log.error( interestingTokens[ 'Message' ] ) continue interestingTokens = interestingTokens[ 'Value' ] processTokens = self._processTokens( element, interestingTokens ) if not processTokens[ 'OK' ]: self.log.error( processTokens[ 'Message' ] ) continue notificationResult = self._notifyOfTokens() if not notificationResult[ 'OK' ]: self.log.error( notificationResult[ 'Message' ] ) return S_OK() def _getInterestingTokens( self, element ): ''' Given an element, picks all the entries with TokenExpiration < now + X<hours> If the TokenOwner is not the rssToken ( rs_svc ), it is selected. ''' tokenExpLimit = datetime.utcnow() + timedelta( hours = self.notifyHours ) tokenElements = self.rsClient.selectStatusElement( element, 'Status', meta = { 'older' : ( 'TokenExpiration', tokenExpLimit ) } ) if not tokenElements[ 'OK' ]: return tokenElements tokenColumns = tokenElements[ 'Columns' ] tokenElements = tokenElements[ 'Value' ] interestingTokens = [] for tokenElement in tokenElements: tokenElement = dict( zip( tokenColumns, tokenElement ) ) if tokenElement[ 'TokenOwner' ] != self.__rssToken: interestingTokens.append( tokenElement ) return S_OK( interestingTokens ) def _processTokens( self, element, tokenElements ): ''' Given an element and a list of interesting token elements, updates the database if the token is expired, logs a message and adds ''' never = datetime.max for tokenElement in tokenElements: try: name = tokenElement[ 'Name' ] statusType = tokenElement[ 'StatusType' ] status = tokenElement[ 'Status' ] tokenOwner = tokenElement[ 'TokenOwner' ] tokenExpiration = tokenElement[ 'TokenExpiration' ] except KeyError as e: return S_ERROR( e ) # If token has already expired if tokenExpiration < datetime.utcnow(): _msg = '%s with statusType "%s" and owner %s EXPIRED' self.log.info( _msg % ( name, statusType, tokenOwner ) ) result = self.rsClient.addOrModifyStatusElement( element, 'Status', name = name, statusType = statusType, tokenOwner = self.__rssToken, tokenExpiration = never ) if not result[ 'OK' ]: return result else: _msg = '%s with statusType "%s" and owner %s -> %s' self.log.info( _msg % ( name, statusType, tokenOwner, tokenExpiration ) ) if tokenOwner not in self.tokenDict: self.tokenDict[ tokenOwner ] = [] self.tokenDict[ tokenOwner ].append( [ tokenOwner, element, name, statusType, status, tokenExpiration ] ) return S_OK() def _notifyOfTokens( self ): ''' Splits interesing tokens between expired and expiring. Also splits them among users. It ends sending notifications to the users. ''' now = datetime.utcnow() adminExpired = [] adminExpiring = [] for tokenOwner, tokenLists in self.tokenDict.items(): expired = [] expiring = [] for tokenList in tokenLists: if tokenList[ 5 ] < now: expired.append( tokenList ) adminExpired.append( tokenList ) else: expiring.append( tokenList ) adminExpiring.append( tokenList ) resNotify = self._notify( tokenOwner, expired, expiring ) if not resNotify[ 'OK' ]: self.log.error( 'Failed to notify token owner', resNotify[ 'Message' ] ) if (adminExpired or adminExpiring) and self.adminMail: return self._notify(self.adminMail, adminExpired, adminExpiring) return S_OK() def _notify( self, tokenOwner, expired, expiring ): ''' Given a token owner and a list of expired and expiring tokens, sends an email to the user. ''' subject = 'RSS token summary for tokenOwner %s' % tokenOwner mail = '\nEXPIRED tokens ( RSS has taken control of them )\n' for tokenList in expired: mail += ' '.join( [ str(x) for x in tokenList ] ) mail += '\n' mail = '\nEXPIRING tokens ( RSS will take control of them )\n' for tokenList in expiring: mail += ' '.join( [ str(x) for x in tokenList ] ) mail += '\n' mail += "\n\n You can extend for another 24 hours using the web interface (Set token -> Acquire)\n" mail += " Or you can use the dirac-rss-set-token script\n\n" mail += "Through the same interfaces you can release the token any time\n" # FIXME: you can re-take control of them using this or that... resEmail = self.diracAdmin.sendMail( tokenOwner, subject, mail ) if not resEmail[ 'OK' ]: return S_ERROR( 'Cannot send email to user "%s"' % tokenOwner ) return resEmail
def execute(self): """ The main method of the agent. It get elements which need to be tested and evaluated from CS. Then it instantiates TestExecutor and StatusEvaluate and calls their main method to finish all the work. """ from BESDIRAC.ResourceStatusSystem.SAM.SAMTest import TestConfiguration self.tests = TestConfiguration.TESTS self.__loadTestObj() self.testExecutor = TestExecutor( self.tests, self.apis ) self.statusEvaluator = StatusEvaluator( self.apis ) elements = [] sitesCEs = {} # CE tests noTestSites = [ site.strip() for site in self.am_getOption( 'noTestSite', '' ).split( ',' ) if site != '' ] diracAdmin = DiracAdmin() activeSites = diracAdmin.getSiteMask() # wmsAdmin = RPCClient('WorkloadManagement/WMSAdministrator') # activeSites = wmsAdmin.getSiteMask() if not activeSites[ 'OK' ]: return activeSites activeSites = [ site for site in activeSites[ 'Value' ] if site not in noTestSites ] gLogger.info('Active sites: %s', activeSites) for siteName in activeSites: domain = siteName.split('.')[ 0 ] vos = BESUtils.getSiteVO( siteName ) if 'CLOUD' != domain: siteCEs = CSHelpers.getSiteComputingElements( siteName ) sitesCEs[ siteName ] = siteCEs for ce in siteCEs: elements.append( { 'ElementName' : ce, 'ElementType' : 'ComputingElement', 'VO' : vos } ) gLogger.debug("List of elements: %s" % ce) else: sitesCEs[ siteName ] = [ siteName ] elements.append( { 'ElementName' : siteName, 'ElementType' : 'CLOUD', 'VO' : vos } ) # SE tests ses = gConfig.getValue( 'Resources/StorageElementGroups/SE-USER' ) for se in ses.split( ', ' ): seSites = BESUtils.getSitesForSE( se ) for seSite in seSites: gLogger.debug( 'Site for SE %s: %s' % (se, seSite) ) if seSite not in activeSites: continue vos = BESUtils.getSiteVO( seSite ) gLogger.debug( 'vos for SE %s under site %s: %s' % (se, seSite, vos) ) if len(vos) == 0: continue vo = vos[0] elements.append( { 'ElementName' : se, 'ElementType' : 'StorageElement', 'VO' : vo } ) gLogger.info( 'VO for SE %s: %s' % ( se, vo ) ) break lastCheckTime = datetime.utcnow().replace(microsecond = 0) self.elementsStatus = {} threads = [] for elementDict in elements: t = threading.Thread( target = self._execute, args = ( elementDict, ) ) threads.append( t ) t.start() for thread in threads: thread.join() for siteName in activeSites: seList = CSHelpers.getSiteStorageElements( siteName ) se = '' if [] != seList: se = seList[ 0 ] try: seStatus = self.elementsStatus[ se ][ 'all' ] except KeyError: seStatus = None voStatus = { 'all' : [] } for ce in sitesCEs[ siteName ]: if not self.elementsStatus.has_key( ce ): continue for vo, status in self.elementsStatus[ ce ].items(): if vo not in voStatus: voStatus[ vo ] = [] voStatus[ vo ].append( status ) for vo, ceStatusList in voStatus.items(): if ceStatusList == [] and seStatus == None: continue res = self.statusEvaluator.evaluateSiteStatus( siteName, ceStatusList, seStatus, vo = vo, lastCheckTime = lastCheckTime) if not res[ 'OK' ]: gLogger.error( 'StatusEvaluator.evaluateSiteStatus: %s' % res[ 'Message' ] ) break return S_OK()
Script.setUsageMessage('\n'.join([ __doc__.split('\n')[1], 'Usage:', ' %s [option|cfgfile] ... PilotID ...' % Script.scriptName, 'Arguments:', ' PilotID: Grid ID of the pilot' ])) Script.parseCommandLine(ignoreErrors=True) args = Script.getPositionalArgs() if len(args) < 1: Script.showHelp() from DIRAC import exit as DIRACExit from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin diracAdmin = DiracAdmin() exitCode = 0 errorList = [] for gridID in args: result = diracAdmin.getPilotOutput(gridID) if not result['OK']: errorList.append((gridID, result['Message'])) exitCode = 2 for error in errorList: print("ERROR %s: %s" % error) DIRACExit(exitCode)
from DIRAC.Core.Base import Script Script.setUsageMessage('\n'.join([__doc__.split('\n')[1], 'Usage:', ' %s [option|cfgfile] ... PilotID ...' % Script.scriptName, 'Arguments:', ' PilotID: Grid ID of the pilot'])) Script.parseCommandLine(ignoreErrors=True) args = Script.getPositionalArgs() if len(args) < 1: Script.showHelp() from DIRAC import exit as DIRACExit from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin diracAdmin = DiracAdmin() exitCode = 0 errorList = [] for gridID in args: result = diracAdmin.getPilotLoggingInfo(gridID) if not result['OK']: errorList.append((gridID, result['Message'])) exitCode = 2 else: print('Pilot Reference: %s', gridID) print(result['Value']) print() for error in errorList:
Script.setUsageMessage('\n'.join([ __doc__.split('\n')[1], 'Usage:', ' %s [option|cfgfile] ... PilotID ...' % Script.scriptName, 'Arguments:', ' PilotID: Grid ID of the pilot' ])) Script.registerSwitch('e', 'extended', 'Get extended printout', setExtendedPrint) Script.parseCommandLine(ignoreErrors=True) args = Script.getPositionalArgs() if len(args) < 1: Script.showHelp() from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin from DIRAC.Interfaces.API.Dirac import Dirac diracAdmin = DiracAdmin() dirac = Dirac() exitCode = 0 errorList = [] for gridID in args: result = diracAdmin.getPilotInfo(gridID) if not result['OK']: errorList.append((gridID, result['Message'])) exitCode = 2 else: res = result['Value'][gridID] if extendedPrint: tab = '' for key in [
' Property=<Value>: Other properties to be added to the User like (VOMSRole=XXXX)', ] ) ) Script.registerSwitch( 'G:', 'GroupName:', 'Name of the Group (Mandatory)', setGroupName ) Script.registerSwitch( 'U:', 'UserName:'******'Short Name of user to be added to the Group (Allow Multiple instances or None)', addUserName ) Script.registerSwitch( 'P:', 'Property:', 'Property to be added to the Group (Allow Multiple instances or None)', addProperty ) Script.parseCommandLine( ignoreErrors = True ) if groupName == None: Script.showHelp() DIRAC.exit( -1 ) args = Script.getPositionalArgs() from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin diracAdmin = DiracAdmin() exitCode = 0 errorList = [] groupProps = {} if userNames: groupProps['Users'] = ', '.join( userNames ) if groupProperties: groupProps['Properties'] = ', '.join( groupProperties ) for prop in args: pl = prop.split( "=" ) if len( pl ) < 2: errorList.append( ( "in arguments", "Property %s has to include a '=' to separate name from value" % prop ) ) exitCode = 255 else:
#!/usr/bin/env python ######################################################################## # $HeadURL$ # File : dirac-admin-pilot-summary # Author : Stuart Paterson ######################################################################## __RCSID__ = "$Id$" import DIRAC from DIRAC.Core.Base import Script Script.parseCommandLine(ignoreErrors=True) args = Script.getPositionalArgs() from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin diracAdmin = DiracAdmin() result = diracAdmin.getPilotSummary() if result['OK']: DIRAC.exit(0) else: print result['Message'] DIRAC.exit(2)
Script.registerSwitch('D:', 'HostDN:', 'DN of the Host Certificate (Mandatory)', setHostDN) Script.registerSwitch( 'P:', 'Property:', 'Property to be added to the Host (Allow Multiple instances or None)', addProperty) Script.parseCommandLine(ignoreErrors=True) if hostName is None or hostDN is None: Script.showHelp(exitCode=1) args = Script.getPositionalArgs() from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin diracAdmin = DiracAdmin() exitCode = 0 errorList = [] hostProps = {'DN': hostDN} if hostProperties: hostProps['Properties'] = ', '.join(hostProperties) for prop in args: pl = prop.split("=") if len(pl) < 2: errorList.append(("in arguments", "Property %s has to include a '=' to separate name from value" % prop)) exitCode = 255 else: pName = pl[0] pValue = "=".join(pl[1:])
class EmailAgent(AgentModule): def __init__(self, *args, **kwargs): AgentModule.__init__(self, *args, **kwargs) self.diracAdmin = None self.default_value = None self.rsClient = ResourceStatusClient() def initialize(self, *args, **kwargs): ''' EmailAgent initialization ''' self.diracAdmin = DiracAdmin() return S_OK() @staticmethod def _groupBySiteName(result): """ Group results by SiteName """ siteNameCol = result['Columns'].index('SiteName') resultValue = result['Value'] siteNameDict = {} for row in resultValue: if row[siteNameCol] not in siteNameDict: siteNameDict[row[siteNameCol]] = [row] else: siteNameDict[row[siteNameCol]].append(row) return siteNameDict def execute(self): result = self.rsClient.select('ResourceStatusCache') if not result['OK']: return S_ERROR() columnNames = result['Columns'] result = self._groupBySiteName(result) for site, records in result.iteritems(): email = "" html_body = "" html_elements = "" if gConfig.getValue('/DIRAC/Setup'): setup = "(" + gConfig.getValue('/DIRAC/Setup') + ")\n\n" else: setup = "" html_header = """\ <!DOCTYPE html> <html> <head> <meta charset='UTF-8'> <style> table{{color:#333;font-family:Helvetica,Arial,sans-serif;min-width:700px;border-collapse:collapse;border-spacing:0}} td,th{{border:1px solid transparent;height:30px;transition:all .3s}}th{{background:#DFDFDF;font-weight:700}} td{{background:#FAFAFA;text-align:center}}.setup{{font-size:150%;color:grey}}.Banned{{color:red}}.Error{{color:#8b0000}} .Degraded{{color:gray}}.Probing{{color:#00f}}.Active{{color:green}}tr:nth-child(even) td{{background:#F1F1F1}}tr:nth-child(odd) td{{background:#FEFEFE}}tr td:hover{{background:#666;color:#FFF}} </style> </head> <body> <p class="setup">{setup}</p> """.format(setup=setup) for row in records: statusType = row[columnNames.index('StatusType')] resourceName = row[columnNames.index('ResourceName')] status = row[columnNames.index('Status')] time = row[columnNames.index('Time')] previousStatus = row[columnNames.index('PreviousStatus')] html_elements += "<tr>" + \ "<td>" + statusType + "</td>" + \ "<td>" + resourceName + "</td>" + \ "<td class='" + status + "'>" + status + "</td>" + \ "<td>" + str(time) + "</td>" + \ "<td class='" + previousStatus + "'>" + previousStatus + "</td>" + \ "</tr>" html_body = """\ <table> <tr> <th>Status Type</th> <th>Resource Name</th> <th>Status</th> <th>Time</th> <th>Previous Status</th> </tr> {html_elements} </table> </body> </html> """.format(html_elements=html_elements) email = html_header + html_body subject = "RSS actions taken for " + site[0] + "\n" self._sendMail(subject, email, html=True) self.rsClient.delete('ResourceStatusCache') return S_OK() def _sendMail(self, subject, body, html=False): userEmails = self._getUserEmails() if not userEmails['OK']: return userEmails # User email address used to send the emails from. fromAddress = RssConfiguration.RssConfiguration().getConfigFromAddress( ) for user in userEmails['Value']: # FIXME: should not I get the info from the RSS User cache ? resEmail = self.diracAdmin.sendMail(user, subject, body, fromAddress=fromAddress, html=html) if not resEmail['OK']: return S_ERROR('Cannot send email to user "%s"' % user) return S_OK() def _getUserEmails(self): configResult = RssConfiguration.getnotificationGroups() if not configResult['OK']: return configResult try: notificationGroups = configResult['Value']['notificationGroups'] except KeyError: return S_ERROR('%s/notificationGroups not found') notifications = RssConfiguration.getNotifications() if not notifications['OK']: return notifications notifications = notifications['Value'] userEmails = [] for notificationGroupName in notificationGroups: try: userEmails.extend( notifications[notificationGroupName]['users']) except KeyError: self.log.error('%s not present' % notificationGroupName) return S_OK(userEmails)