def execute(self): """ The main execution method. """ self.log.notice( "Will ignore the following productions: %s" % self.productionsToIgnore ) self.log.notice( " Job Cache: %s " % self.jobCache ) transformations = self.getEligibleTransformations( self.transformationStatus, self.transformationTypes ) if not transformations['OK']: self.log.error( "Failure to get transformations", transformations['Message'] ) return S_ERROR( "Failure to get transformations" ) for prodID,values in transformations['Value'].iteritems(): if prodID in self.productionsToIgnore: self.log.notice( "Ignoring Production: %s " % prodID ) continue self.__resetCounters() self.inputFilesProcessed = set() transType, transName = values self.log.notice( "Running over Production: %s " % prodID ) self.treatProduction( int(prodID), transName, transType ) if self.notesToSend: ##remove from the jobCache because something happened self.jobCache.pop( int(prodID), None ) notification = NotificationClient() for address in self.addressTo: result = notification.sendMail( address, "%s: %s" %( self.subject, prodID ), self.notesToSend, self.addressFrom, localAttempt = False ) if not result['OK']: self.log.error( 'Cannot send notification mail', result['Message'] ) self.notesToSend = "" return S_OK()
def notifyAboutNewSoftware(self): """Send an email to the mailing list if a new software version was defined""" #Only send email when something was actually added if not self.modifiedCS: return subject = '%s %s added to DIRAC CS' % (self.appName, self.appVersion) msg = 'New application %s %s declared into Configuration service\n %s' % ( self.appName, self.appVersion, self.comment) from DIRAC.Core.Security.ProxyInfo import getProxyInfo from DIRAC.ConfigurationSystem.Client.Helpers.Registry import getUserOption from DIRAC.FrameworkSystem.Client.NotificationClient import NotificationClient notifyClient = NotificationClient() gLogger.notice('Sending mail for software installation to %s' % (self.mailadress)) res = getProxyInfo() if not res['OK']: sender = '*****@*****.**' else: if 'username' in res['Value']: sender = getUserOption(res['Value']['username'], 'Email') else: sender = '*****@*****.**' gLogger.info('*' * 80) # surround email with stars res = notifyClient.sendMail(self.mailadress, subject, msg, sender, localAttempt=False) gLogger.info('*' * 80) if not res['OK']: gLogger.error('The mail could not be sent: %s' % res['Message'])
def __lookForNewCEs( self ): """ Look up BDII for CEs not yet present in the DIRAC CS """ bannedCEs = self.am_getOption( 'BannedCEs', [] ) result = getCEsFromCS() if not result['OK']: return result knownCEs = set( result['Value'] ) knownCEs = knownCEs.union( set( bannedCEs ) ) for vo in self.voName: result = self.__getBdiiCEInfo( vo ) if not result['OK']: continue bdiiInfo = result['Value'] result = getGridCEs( vo, bdiiInfo = bdiiInfo, ceBlackList = knownCEs ) if not result['OK']: self.log.error( 'Failed to get unused CEs', result['Message'] ) siteDict = result['Value'] body = '' for site in siteDict: newCEs = set( siteDict[site].keys() ) if not newCEs: continue ceString = '' for ce in newCEs: queueString = '' ceInfo = bdiiInfo[site]['CEs'][ce] ceString = "CE: %s, GOCDB Site Name: %s" % ( ce, site ) systemTuple = siteDict[site][ce]['System'] osString = "%s_%s_%s" % ( systemTuple ) newCEString = "\n%s\n%s\n" % ( ceString, osString ) for queue in ceInfo['Queues']: queueStatus = ceInfo['Queues'][queue].get( 'GlueCEStateStatus', 'UnknownStatus' ) if 'production' in queueStatus.lower(): ceType = ceInfo['Queues'][queue].get( 'GlueCEImplementationName', '' ) queueString += " %s %s %s\n" % ( queue, queueStatus, ceType ) if queueString: ceString = newCEString ceString += "Queues:\n" ceString += queueString if ceString: body += ceString if body: body = "\nWe are glad to inform You about new CE(s) possibly suitable for %s:\n" % vo + body body += "\n\nTo suppress information about CE add its name to BannedCEs list.\n" body += "Add new Sites/CEs for vo %s with the command:\n" % vo body += "dirac-admin-add-resources --vo %s --ce\n" % vo self.log.info( body ) if self.addressTo and self.addressFrom: notification = NotificationClient() result = notification.sendMail( self.addressTo, self.subject, body, self.addressFrom, localAttempt = False ) if not result['OK']: self.log.error( 'Can not send new site notification mail', result['Message'] ) return S_OK()
def notifyAboutNewSoftware(self): """Send an email to the mailing list if a new software version was defined""" #Only send email when something was actually added if not self.modifiedCS: return subject = '%s %s added to DIRAC CS' % (self.appName, self.appVersion) msg = 'New application %s %s declared into Configuration service\n %s' % (self.appName, self.appVersion, self.comment) from DIRAC.Core.Security.ProxyInfo import getProxyInfo from DIRAC.ConfigurationSystem.Client.Helpers.Registry import getUserOption from DIRAC.FrameworkSystem.Client.NotificationClient import NotificationClient notifyClient = NotificationClient() gLogger.notice('Sending mail for software installation to %s' % (self.mailadress)) res = getProxyInfo() if not res['OK']: sender = '*****@*****.**' else: if 'username' in res['Value']: sender = getUserOption(res['Value']['username'],'Email') else: sender = '*****@*****.**' gLogger.info('*'*80)# surround email with stars res = notifyClient.sendMail(self.mailadress, subject, msg, sender, localAttempt = False) gLogger.info('*'*80) if not res[ 'OK' ]: gLogger.error('The mail could not be sent: %s' % res['Message'])
def sendMail(self, address, subject, body, fromAddress=None, localAttempt=True): """ Send mail to specified address with body. """ notification = NotificationClient() return notification.sendMail(address, subject, body, fromAddress, localAttempt)
def __updateCS(self, bdiiChangeSet): queueVODict = {} changeSet = set() for entry in bdiiChangeSet: section, option, _value, new_value = entry if option == "VO": queueVODict.setdefault(section, set()) queueVODict[section] = queueVODict[section].union( set(new_value.split(','))) else: changeSet.add(entry) for section, VOs in queueVODict.items(): changeSet.add((section, 'VO', '', ','.join(VOs))) if changeSet: changeList = list(changeSet) changeList.sort() body = '\n'.join( ["%s/%s %s -> %s" % entry for entry in changeList]) if body and self.addressTo and self.addressFrom: notification = NotificationClient() result = notification.sendMail(self.addressTo, self.subject, body, self.addressFrom, localAttempt=False) if body: self.log.info( 'The following configuration changes were detected:') self.log.info(body) for section, option, value, new_value in changeSet: if value == 'Unknown' or not value: self.csAPI.setOption(cfgPath(section, option), new_value) else: self.csAPI.modifyValue(cfgPath(section, option), new_value) if self.dryRun: self.log.info("Dry Run: CS won't be updated") self.csAPI.showDiff() else: result = self.csAPI.commit() if not result['OK']: self.log.error("Error while committing to CS", result['Message']) else: self.log.info("Successfully committed %d changes to CS" % len(changeList)) return result else: self.log.info("No changes found") return S_OK()
def __lookForNewSEs(self): """ Look up BDII for SEs not yet present in the DIRAC CS """ bannedSEs = self.am_getOption('BannedSEs', []) result = getSEsFromCS() if not result['OK']: return result knownSEs = set(result['Value']) knownSEs = knownSEs.union(set(bannedSEs)) for vo in self.voName: result = self.__getBdiiSEInfo(vo) if not result['OK']: continue bdiiInfo = result['Value'] result = getGridSRMs(vo, bdiiInfo=bdiiInfo, srmBlackList=knownSEs) if not result['OK']: continue siteDict = result['Value'] body = '' for site in siteDict: newSEs = set(siteDict[site].keys()) # pylint: disable=no-member if not newSEs: continue for se in newSEs: body += '\n New SE %s available at site %s:\n' % (se, site) backend = siteDict[site][se]['SE'].get( 'GlueSEImplementationName', 'Unknown') size = siteDict[site][se]['SE'].get( 'GlueSESizeTotal', 'Unknown') body += ' Backend %s, Size %s' % (backend, size) if body: body = "\nWe are glad to inform You about new SE(s) possibly suitable for %s:\n" % vo + body body += "\n\nTo suppress information about an SE add its name to BannedSEs list.\n" body += "Add new SEs for vo %s with the command:\n" % vo body += "dirac-admin-add-resources --vo %s --se\n" % vo self.log.info(body) if self.addressTo and self.addressFrom: notification = NotificationClient() result = notification.sendMail(self.addressTo, self.subject, body, self.addressFrom, localAttempt=False) if not result['OK']: self.log.error( 'Can not send new site notification mail', result['Message']) return S_OK()
def main(): Script.parseCommandLine(ignoreErrors=True) args = Script.getPositionalArgs() arg = "".join(args) if not len(arg) > 0: gLogger.error("Missing argument") DIRACexit(2) try: head, body = arg.split("\\n\\n") except Exception as x: head = "To: %s" % arg body = sys.stdin.read() try: tmp, body = body.split("\\n\\n") head = tmp + "\\n" + head except Exception as x: pass body = "".join(body.strip()) try: headers = dict((i.strip(), j.strip()) for i, j in (item.split(':') for item in head.split('\\n'))) except BaseException: gLogger.error("Failed to convert string: %s to email headers" % head) DIRACexit(4) if "To" not in headers: gLogger.error("Failed to get 'To:' field from headers %s" % head) DIRACexit(5) to = headers["To"] origin = "%s@%s" % (os.getenv("LOGNAME", "dirac"), socket.getfqdn()) if "From" in headers: origin = headers["From"] subject = "Sent from %s" % socket.getfqdn() if "Subject" in headers: subject = headers["Subject"] ntc = NotificationClient() print("sendMail(%s,%s,%s,%s,%s)" % (to, subject, body, origin, False)) result = ntc.sendMail(to, subject, body, origin, localAttempt=False) if not result["OK"]: gLogger.error(result["Message"]) DIRACexit(6) DIRACexit(0)
def __updateCS( self, bdiiChangeSet ): queueVODict = {} changeSet = set() for entry in bdiiChangeSet: section, option , _value, new_value = entry if option == "VO": queueVODict.setdefault( section, set() ) queueVODict[section] = queueVODict[section].union( set( new_value.split( ',' ) ) ) else: changeSet.add( entry ) for section, VOs in queueVODict.items(): changeSet.add( ( section, 'VO', '', ','.join( VOs ) ) ) if changeSet: changeList = list( changeSet ) changeList.sort() body = '\n'.join( [ "%s/%s %s -> %s" % entry for entry in changeList ] ) if body and self.addressTo and self.addressFrom: notification = NotificationClient() result = notification.sendMail( self.addressTo, self.subject, body, self.addressFrom, localAttempt = False ) if body: self.log.info( 'The following configuration changes were detected:' ) self.log.info( body ) for section, option, value, new_value in changeSet: if value == 'Unknown' or not value: self.csAPI.setOption( cfgPath( section, option ), new_value ) else: self.csAPI.modifyValue( cfgPath( section, option ), new_value ) if self.dryRun: self.log.info( "Dry Run: CS won't be updated" ) self.csAPI.showDiff() else: result = self.csAPI.commit() if not result['OK']: self.log.error( "Error while committing to CS", result['Message'] ) else: self.log.info( "Successfully committed %d changes to CS" % len( changeList ) ) return result else: self.log.info( "No changes found" ) return S_OK()
def __lookForNewSEs( self ): """ Look up BDII for SEs not yet present in the DIRAC CS """ bannedSEs = self.am_getOption( 'BannedSEs', [] ) result = getSEsFromCS() if not result['OK']: return result knownSEs = set( result['Value'] ) knownSEs = knownSEs.union( set( bannedSEs ) ) for vo in self.voName: result = self.__getBdiiSEInfo( vo ) if not result['OK']: continue bdiiInfo = result['Value'] result = getGridSRMs( vo, bdiiInfo = bdiiInfo, srmBlackList = knownSEs ) if not result['OK']: continue siteDict = result['Value'] body = '' for site in siteDict: newSEs = set( siteDict[site].keys() ) if not newSEs: continue for se in newSEs: body += '\n New SE %s available at site %s:\n' % ( se, site ) backend = siteDict[site][se]['SE'].get( 'GlueSEImplementationName', 'Unknown' ) size = siteDict[site][se]['SE'].get( 'GlueSESizeTotal', 'Unknown' ) body += ' Backend %s, Size %s' % ( backend, size ) if body: body = "\nWe are glad to inform You about new SE(s) possibly suitable for %s:\n" % vo + body body += "\n\nTo suppress information about an SE add its name to BannedSEs list.\n" body += "Add new SEs for vo %s with the command:\n" % vo body += "dirac-admin-add-resources --vo %s --se\n" % vo self.log.info( body ) if self.addressTo and self.addressFrom: notification = NotificationClient() result = notification.sendMail( self.addressTo, self.subject, body, self.addressFrom, localAttempt = False ) if not result['OK']: self.log.error( 'Can not send new site notification mail', result['Message'] ) return S_OK()
def execute(self): """ The main execution method. """ self.log.notice("Will ignore the following productions: %s" % self.productionsToIgnore) self.log.notice(" Job Cache: %s " % self.jobCache) transformations = self.getEligibleTransformations( self.transformationStatus, self.transformationTypes) if not transformations['OK']: self.log.error("Failure to get transformations", transformations['Message']) return S_ERROR("Failure to get transformations") for prodID, values in transformations['Value'].iteritems(): if prodID in self.productionsToIgnore: self.log.notice("Ignoring Production: %s " % prodID) continue self.__resetCounters() self.inputFilesProcessed = set() transType, transName = values self.log.notice("Running over Production: %s " % prodID) self.treatProduction(int(prodID), transName, transType) if self.notesToSend and self.__notOnlyKeepers(transType): ##remove from the jobCache because something happened self.jobCache.pop(int(prodID), None) notification = NotificationClient() for address in self.addressTo: result = notification.sendMail(address, "%s: %s" % (self.subject, prodID), self.notesToSend, self.addressFrom, localAttempt=False) if not result['OK']: self.log.error('Cannot send notification mail', result['Message']) self.notesToSend = "" return S_OK()
def __infoFromCE( self ): sitesSection = cfgPath( 'Resources', 'Sites' ) result = gConfig.getSections( sitesSection ) if not result['OK']: return grids = result['Value'] changed = False body = "" for grid in grids: gridSection = cfgPath( sitesSection, grid ) result = gConfig.getSections( gridSection ) if not result['OK']: return sites = result['Value'] for site in sites: siteSection = cfgPath( gridSection, site ) opt = gConfig.getOptionsDict( siteSection )['Value'] name = opt.get( 'Name', '' ) if name: coor = opt.get( 'Coordinates', 'Unknown' ) mail = opt.get( 'Mail', 'Unknown' ) result = ldapSite( name ) if not result['OK']: self.log.warn( "BDII site %s: %s" % ( name, result['Message'] ) ) result = self.__checkAlternativeBDIISite( ldapSite, name ) if result['OK']: bdiisites = result['Value'] if len( bdiisites ) == 0: self.log.warn( name, "Error in bdii: leng = 0" ) else: if not len( bdiisites ) == 1: self.log.warn( name, "Warning in bdii: leng = %d" % len( bdiisites ) ) bdiisite = bdiisites[0] try: longitude = bdiisite['GlueSiteLongitude'] latitude = bdiisite['GlueSiteLatitude'] newcoor = "%s:%s" % ( longitude, latitude ) except: self.log.warn( "Error in bdii coor" ) newcoor = "Unknown" try: newmail = bdiisite['GlueSiteSysAdminContact'].split( ":" )[-1].strip() except: self.log.warn( "Error in bdii mail" ) newmail = "Unknown" self.log.debug( "%s %s %s" % ( name, newcoor, newmail ) ) if newcoor != coor: self.log.info( "%s" % ( name ), "%s -> %s" % ( coor, newcoor ) ) if coor == 'Unknown': self.csAPI.setOption( cfgPath( siteSection, 'Coordinates' ), newcoor ) else: self.csAPI.modifyValue( cfgPath( siteSection, 'Coordinates' ), newcoor ) changed = True if newmail != mail: self.log.info( "%s" % ( name ), "%s -> %s" % ( mail, newmail ) ) if mail == 'Unknown': self.csAPI.setOption( cfgPath( siteSection, 'Mail' ), newmail ) else: self.csAPI.modifyValue( cfgPath( siteSection, 'Mail' ), newmail ) changed = True celist = List.fromChar( opt.get( 'CE', '' ) ) if not celist: self.log.warn( site, 'Empty site list' ) continue # result = gConfig.getSections( cfgPath( siteSection,'CEs' ) # if not result['OK']: # self.log.debug( "Section CEs:", result['Message'] ) for ce in celist: ceSection = cfgPath( siteSection, 'CEs', ce ) result = gConfig.getOptionsDict( ceSection ) if not result['OK']: self.log.debug( "Section CE", result['Message'] ) wnTmpDir = 'Unknown' arch = 'Unknown' os = 'Unknown' si00 = 'Unknown' pilot = 'Unknown' cetype = 'Unknown' else: ceopt = result['Value'] wnTmpDir = ceopt.get( 'wnTmpDir', 'Unknown' ) arch = ceopt.get( 'architecture', 'Unknown' ) os = ceopt.get( 'OS', 'Unknown' ) si00 = ceopt.get( 'SI00', 'Unknown' ) pilot = ceopt.get( 'Pilot', 'Unknown' ) cetype = ceopt.get( 'CEType', 'Unknown' ) result = ldapCE( ce ) if not result['OK']: self.log.warn( 'Error in bdii for %s' % ce, result['Message'] ) result = self.__checkAlternativeBDIISite( ldapCE, ce ) continue try: bdiice = result['Value'][0] except: self.log.warn( 'Error in bdii for %s' % ce, result ) bdiice = None if bdiice: try: newwnTmpDir = bdiice['GlueSubClusterWNTmpDir'] except: newwnTmpDir = 'Unknown' if wnTmpDir != newwnTmpDir and newwnTmpDir != 'Unknown': section = cfgPath( ceSection, 'wnTmpDir' ) self.log.info( section, " -> ".join( ( wnTmpDir, newwnTmpDir ) ) ) if wnTmpDir == 'Unknown': self.csAPI.setOption( section, newwnTmpDir ) else: self.csAPI.modifyValue( section, newwnTmpDir ) changed = True try: newarch = bdiice['GlueHostArchitecturePlatformType'] except: newarch = 'Unknown' if arch != newarch and newarch != 'Unknown': section = cfgPath( ceSection, 'architecture' ) self.log.info( section, " -> ".join( ( arch, newarch ) ) ) if arch == 'Unknown': self.csAPI.setOption( section, newarch ) else: self.csAPI.modifyValue( section, newarch ) changed = True try: newos = '_'.join( ( bdiice['GlueHostOperatingSystemName'], bdiice['GlueHostOperatingSystemVersion'], bdiice['GlueHostOperatingSystemRelease'] ) ) except: newos = 'Unknown' if os != newos and newos != 'Unknown': section = cfgPath( ceSection, 'OS' ) self.log.info( section, " -> ".join( ( os, newos ) ) ) if os == 'Unknown': self.csAPI.setOption( section, newos ) else: self.csAPI.modifyValue( section, newos ) changed = True body = body + "OS was changed %s -> %s for %s at %s\n" % ( os, newos, ce, site ) try: newsi00 = bdiice['GlueHostBenchmarkSI00'] except: newsi00 = 'Unknown' if si00 != newsi00 and newsi00 != 'Unknown': section = cfgPath( ceSection, 'SI00' ) self.log.info( section, " -> ".join( ( si00, newsi00 ) ) ) if si00 == 'Unknown': self.csAPI.setOption( section, newsi00 ) else: self.csAPI.modifyValue( section, newsi00 ) changed = True try: rte = bdiice['GlueHostApplicationSoftwareRunTimeEnvironment'] if self.voName.lower() == 'lhcb': if 'VO-lhcb-pilot' in rte: newpilot = 'True' else: newpilot = 'False' else: newpilot = 'Unknown' except: newpilot = 'Unknown' if pilot != newpilot and newpilot != 'Unknown': section = cfgPath( ceSection, 'Pilot' ) self.log.info( section, " -> ".join( ( pilot, newpilot ) ) ) if pilot == 'Unknown': self.csAPI.setOption( section, newpilot ) else: self.csAPI.modifyValue( section, newpilot ) changed = True result = ldapCEState( ce, vo = self.voName ) #getBDIICEVOView if not result['OK']: self.log.warn( 'Error in bdii for queue %s' % ce, result['Message'] ) result = self.__checkAlternativeBDIISite( ldapCEState, ce, self.voName ) continue try: queues = result['Value'] except: self.log.warn( 'Error in bdii for queue %s' % ce, result['Massage'] ) continue newcetype = 'Unknown' for queue in queues: try: queuetype = queue['GlueCEImplementationName'] except: queuetype = 'Unknown' if newcetype == 'Unknown': newcetype = queuetype else: if queuetype != newcetype: self.log.warn( 'Error in bdii for ce %s ' % ce, 'different cetypes %s %s' % ( newcetype, queuetype ) ) if newcetype=='ARC-CE': newcetype = 'ARC' if cetype != newcetype and newcetype != 'Unknown': section = cfgPath( ceSection, 'CEType' ) self.log.info( section, " -> ".join( ( cetype, newcetype ) ) ) if cetype == 'Unknown': self.csAPI.setOption( section, newcetype ) else: self.csAPI.modifyValue( section, newcetype ) changed = True for queue in queues: try: queueName = queue['GlueCEUniqueID'].split( '/' )[-1] except: self.log.warn( 'error in queuename ', queue ) continue try: newmaxCPUTime = queue['GlueCEPolicyMaxCPUTime'] except: newmaxCPUTime = None newsi00 = None try: caps = queue['GlueCECapability'] if type( caps ) == type( '' ): caps = [caps] for cap in caps: if cap.count( 'CPUScalingReferenceSI00' ): newsi00 = cap.split( '=' )[-1] except: newsi00 = None queueSection = cfgPath( ceSection, 'Queues', queueName ) result = gConfig.getOptionsDict( queueSection ) if not result['OK']: self.log.warn( "Section Queues", result['Message'] ) maxCPUTime = 'Unknown' si00 = 'Unknown' else: queueopt = result['Value'] maxCPUTime = queueopt.get( 'maxCPUTime', 'Unknown' ) si00 = queueopt.get( 'SI00', 'Unknown' ) if newmaxCPUTime and ( maxCPUTime != newmaxCPUTime ): section = cfgPath( queueSection, 'maxCPUTime' ) self.log.info( section, " -> ".join( ( maxCPUTime, newmaxCPUTime ) ) ) if maxCPUTime == 'Unknown': self.csAPI.setOption( section, newmaxCPUTime ) else: self.csAPI.modifyValue( section, newmaxCPUTime ) changed = True if newsi00 and ( si00 != newsi00 ): section = cfgPath( queueSection, 'SI00' ) self.log.info( section, " -> ".join( ( si00, newsi00 ) ) ) if si00 == 'Unknown': self.csAPI.setOption( section, newsi00 ) else: self.csAPI.modifyValue( section, newsi00 ) changed = True if changed: self.log.info( body ) if body and self.addressTo and self.addressFrom: notification = NotificationClient() result = notification.sendMail( self.addressTo, self.subject, body, self.addressFrom, localAttempt = False ) return self.csAPI.commit() else: self.log.info( "No changes found" ) return S_OK()
def sendMail( self, address, subject, body, fromAddress = None, localAttempt = True ): """ Send mail to specified address with body. """ notification = NotificationClient() return notification.sendMail( address, subject, body, fromAddress, localAttempt )
from DIRAC.Core.Security.Misc import getProxyInfo from DIRAC import gConfig, gLogger from DIRAC.Core.DISET.RPCClient import RPCClient from DIRAC.ResourceStatusSystem.Utilities.CS import getMailForUser nc = NotificationClient() s = RPCClient( "ResourceStatus/ResourceStatus" ) res = getProxyInfo() if not res['OK']: gLogger.error( "Failed to get proxy information", res['Message'] ) DIRAC.exit( 2 ) userName = res['Value']['username'] group = res['Value']['group'] if group not in ( 'diracAdmin', 'lhcb_prod' ): gLogger.error( "You must be lhcb_prod or diracAdmin to execute this script" ) gLogger.info( "Please issue 'lhcb-proxy-init -g lhcb_prod' or 'lhcb-proxy-init -g diracAdmin'" ) DIRAC.exit( 2 ) for arg in args: g = s.whatIs( arg ) res = s.reAssignToken( g, arg, userName ) if not res['OK']: gLogger.error( "Problem with re-assigning token for %s: " % res['Message'] ) DIRAC.exit( 2 ) mailMessage = "The token for %s %s has been successfully re-assigned." % ( g, arg ) nc.sendMail( getMailForUser( userName )['Value'][0], 'Token for %s reassigned' % arg, mailMessage ) DIRAC.exit( 0 )
def run(self): """ Do actions required to notify users. Mandatory keyword arguments: - Granularity Optional keyword arguments: - SiteType - ServiceType - ResourceType """ # Initializing variables nc = NotificationClient() # raise alarms, right now makes a simple notification if 'Granularity' not in self.kw['Params'].keys(): raise ValueError, "You have to provide a argument Granularity = <desired_granularity>" if self.new_status['Action']: notif = "%s %s is perceived as" % (self.granularity, self.name) notif = notif + " %s. Reason: %s." % (self.new_status['Status'], self.new_status['Reason']) users_to_notify = self._getUsersToNotify() for notif in users_to_notify: for user in notif['Users']: if 'Web' in notif['Notifications']: gLogger.info("Sending web notification to user %s" % user) nc.addNotificationForUser(user, notif) if 'Mail' in notif['Notifications']: gLogger.info("Sending mail notification to user %s" % user) was = self.rsClient.getElementHistory( self.granularity, elementName = self.name, statusType = self.status_type, meta = {"order": "DESC", 'limit' : 1, "columns": ['Status', 'Reason', 'DateEffective']})#[0] if not was[ 'OK' ]: gLogger.error( was[ 'Message' ] ) continue was = was[ 'Value' ][ 0 ] mailMessage = """ ---TESTING--- -------------------------------------------------------------------------------- RSS changed the status of the following resource: Granularity:\t%s Name:\t\t%s New status:\t%s Reason:\t\t%s Was:\t\t%s (%s) since %s Setup:\t\t%s If you think RSS took the wrong decision, please set the status manually: Use: dirac-rss-set-status -g <granularity> -n <element_name> -s <desired_status> [-t status_type] (if you omit the optional last part of the command, all status types are matched.) This notification has been sent according to those parameters: %s """ % (self.granularity, self.name, self.new_status['Status'], self.new_status['Reason'], was[0], was[1], was[2], CS.getSetup(), str(users_to_notify)) # Actually send the mail! resUser = self.rmClient.getUserRegistryCache( user ) if not resUser[ 'OK' ]: gLogger.error( resUser[ 'Message' ] ) continue resUser = resUser[ 'Value' ][ 0 ][ 2 ] nc.sendMail(resUser, '[RSS][%s][%s] %s -> %s' % (self.granularity, self.name, self.new_status['Status'], was[0]), mailMessage) ################################################################################ #EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF
class TokenAgent(AgentModule): ############################################################################# def initialize(self): """ TokenAgent initialization """ try: self.rsDB = ResourceStatusDB() self.nc = NotificationClient() return S_OK() except Exception: errorStr = "TokenAgent initialization" gLogger.exception(errorStr) return S_ERROR(errorStr) ############################################################################# def execute(self): """ The main TokenAgent execution method. Checks for tokens owned by users that are expiring, and notifies those users. Calls rsDB.setToken() to set 'RS_SVC' as owner for those tokens that expired. """ try: #reAssign the token to RS_SVC for g in ('Site', 'StorageElement'): tokensExpired = self.rsDB.getTokens(g, None, datetime.datetime.utcnow()) for token in tokensExpired: self.rsDB.setToken(g, token[0], 'RS_SVC', datetime.datetime(9999, 12, 31, 23, 59, 59)) #notify token owners in2Hours = datetime.datetime.utcnow() + datetime.timedelta(hours = 2) for g in ('Site', 'StorageElement'): tokensExpiring = self.rsDB.getTokens(g, None, in2Hours) for token in tokensExpiring: name = token[0] user = token[1] if user == 'RS_SVC': continue expiration = token[2] mailMessage = "The token for %s %s " %(g, name) mailMessage = mailMessage + "will expire on %s\n\n" %expiration mailMessage = mailMessage + "You can renew it with command 'dirac-rss-renew-token'.\n" mailMessage = mailMessage + "If you don't take any action, RSS will take control of the resource." self.nc.sendMail(getMailForUser(user)['Value'][0], 'Token for %s is expiring' %name, mailMessage) return S_OK() except Exception: errorStr = "TokenAgent execution" gLogger.exception(errorStr) return S_ERROR(errorStr)
def __infoFromCE(self): sitesSection = cfgPath("Resources", "Sites") result = gConfig.getSections(sitesSection) if not result["OK"]: return grids = result["Value"] changed = False body = "" for grid in grids: gridSection = cfgPath(sitesSection, grid) result = gConfig.getSections(gridSection) if not result["OK"]: return sites = result["Value"] for site in sites: # if site[-2:]!='ru': # continue siteSection = cfgPath(gridSection, site) opt = gConfig.getOptionsDict(siteSection)["Value"] name = opt.get("Name", "") if name: coor = opt.get("Coordinates", "Unknown") mail = opt.get("Mail", "Unknown") result = ldapSite(name) if not result["OK"]: self.log.warn("BDII site %s: %s" % (name, result["Message"])) result = self.__checkAlternativeBDIISite(ldapSite, name) if result["OK"]: bdiisites = result["Value"] if len(bdiisites) == 0: self.log.warn(name, "Error in bdii: leng = 0") else: if not len(bdiisites) == 1: self.log.warn(name, "Warning in bdii: leng = %d" % len(bdiisites)) bdiisite = bdiisites[0] try: longitude = bdiisite["GlueSiteLongitude"] latitude = bdiisite["GlueSiteLatitude"] newcoor = "%s:%s" % (longitude, latitude) except: self.log.warn("Error in bdii coor") newcoor = "Unknown" try: newmail = bdiisite["GlueSiteSysAdminContact"].split(":")[-1].strip() except: self.log.warn("Error in bdii mail") newmail = "Unknown" self.log.debug("%s %s %s" % (name, newcoor, newmail)) if newcoor != coor: self.log.info("%s" % (name), "%s -> %s" % (coor, newcoor)) if coor == "Unknown": self.csAPI.setOption(cfgPath(siteSection, "Coordinates"), newcoor) else: self.csAPI.modifyValue(cfgPath(siteSection, "Coordinates"), newcoor) changed = True if newmail != mail: self.log.info("%s" % (name), "%s -> %s" % (mail, newmail)) if mail == "Unknown": self.csAPI.setOption(cfgPath(siteSection, "Mail"), newmail) else: self.csAPI.modifyValue(cfgPath(siteSection, "Mail"), newmail) changed = True celist = List.fromChar(opt.get("CE", "")) if not celist: self.log.warn(site, "Empty site list") continue # result = gConfig.getSections( cfgPath( siteSection,'CEs' ) # if not result['OK']: # self.log.debug( "Section CEs:", result['Message'] ) for ce in celist: ceSection = cfgPath(siteSection, "CEs", ce) result = gConfig.getOptionsDict(ceSection) if not result["OK"]: self.log.debug("Section CE", result["Message"]) wnTmpDir = "Unknown" arch = "Unknown" os = "Unknown" si00 = "Unknown" pilot = "Unknown" cetype = "Unknown" else: ceopt = result["Value"] wnTmpDir = ceopt.get("wnTmpDir", "Unknown") arch = ceopt.get("architecture", "Unknown") os = ceopt.get("OS", "Unknown") si00 = ceopt.get("SI00", "Unknown") pilot = ceopt.get("Pilot", "Unknown") cetype = ceopt.get("CEType", "Unknown") result = ldapCE(ce) if not result["OK"]: self.log.warn("Error in bdii for %s" % ce, result["Message"]) result = self.__checkAlternativeBDIISite(ldapCE, ce) continue try: bdiice = result["Value"][0] except: self.log.warn("Error in bdii for %s" % ce, result) bdiice = None if bdiice: try: newwnTmpDir = bdiice["GlueSubClusterWNTmpDir"] except: newwnTmpDir = "Unknown" if wnTmpDir != newwnTmpDir and newwnTmpDir != "Unknown": section = cfgPath(ceSection, "wnTmpDir") self.log.info(section, " -> ".join((wnTmpDir, newwnTmpDir))) if wnTmpDir == "Unknown": self.csAPI.setOption(section, newwnTmpDir) else: self.csAPI.modifyValue(section, newwnTmpDir) changed = True try: newarch = bdiice["GlueHostArchitecturePlatformType"] except: newarch = "Unknown" if arch != newarch and newarch != "Unknown": section = cfgPath(ceSection, "architecture") self.log.info(section, " -> ".join((arch, newarch))) if arch == "Unknown": self.csAPI.setOption(section, newarch) else: self.csAPI.modifyValue(section, newarch) changed = True try: newos = "_".join( ( bdiice["GlueHostOperatingSystemName"], bdiice["GlueHostOperatingSystemVersion"], bdiice["GlueHostOperatingSystemRelease"], ) ) except: newos = "Unknown" if os != newos and newos != "Unknown": section = cfgPath(ceSection, "OS") self.log.info(section, " -> ".join((os, newos))) if os == "Unknown": self.csAPI.setOption(section, newos) else: self.csAPI.modifyValue(section, newos) changed = True body = body + "OS was changed %s -> %s for %s at %s\n" % (os, newos, ce, site) try: newsi00 = bdiice["GlueHostBenchmarkSI00"] except: newsi00 = "Unknown" if si00 != newsi00 and newsi00 != "Unknown": section = cfgPath(ceSection, "SI00") self.log.info(section, " -> ".join((si00, newsi00))) if si00 == "Unknown": self.csAPI.setOption(section, newsi00) else: self.csAPI.modifyValue(section, newsi00) changed = True try: rte = bdiice["GlueHostApplicationSoftwareRunTimeEnvironment"] if self.voName.lower() == "lhcb": if "VO-lhcb-pilot" in rte: newpilot = "True" else: newpilot = "False" else: newpilot = "Unknown" except: newpilot = "Unknown" if pilot != newpilot and newpilot != "Unknown": section = cfgPath(ceSection, "Pilot") self.log.info(section, " -> ".join((pilot, newpilot))) if pilot == "Unknown": self.csAPI.setOption(section, newpilot) else: self.csAPI.modifyValue(section, newpilot) changed = True result = ldapService(ce) if not result["OK"]: result = self.__checkAlternativeBDIISite(ldapService, ce) if result["OK"] and result["Value"]: services = result["Value"] newcetype = "LCG" for service in services: if service["GlueServiceType"].count("CREAM"): newcetype = "CREAM" else: newcetype = "Unknown" if cetype != newcetype and newcetype != "Unknown": section = cfgPath(ceSection, "CEType") self.log.info(section, " -> ".join((cetype, newcetype))) if cetype == "Unknown": self.csAPI.setOption(section, newcetype) else: self.csAPI.modifyValue(section, newcetype) changed = True result = ldapCEState(ce, vo=self.voName) # getBDIICEVOView if not result["OK"]: self.log.warn("Error in bdii for queue %s" % ce, result["Message"]) result = self.__checkAlternativeBDIISite(ldapCEState, ce, self.voName) continue try: queues = result["Value"] except: self.log.warn("Error in bdii for queue %s" % ce, result["Massage"]) continue for queue in queues: try: queueName = queue["GlueCEUniqueID"].split("/")[-1] except: self.log.warn("error in queuename ", queue) continue try: newmaxCPUTime = queue["GlueCEPolicyMaxCPUTime"] except: newmaxCPUTime = None newsi00 = None try: caps = queue["GlueCECapability"] if type(caps) == type(""): caps = [caps] for cap in caps: if cap.count("CPUScalingReferenceSI00"): newsi00 = cap.split("=")[-1] except: newsi00 = None queueSection = cfgPath(ceSection, "Queues", queueName) result = gConfig.getOptionsDict(queueSection) if not result["OK"]: self.log.warn("Section Queues", result["Message"]) maxCPUTime = "Unknown" si00 = "Unknown" else: queueopt = result["Value"] maxCPUTime = queueopt.get("maxCPUTime", "Unknown") si00 = queueopt.get("SI00", "Unknown") if newmaxCPUTime and (maxCPUTime != newmaxCPUTime): section = cfgPath(queueSection, "maxCPUTime") self.log.info(section, " -> ".join((maxCPUTime, newmaxCPUTime))) if maxCPUTime == "Unknown": self.csAPI.setOption(section, newmaxCPUTime) else: self.csAPI.modifyValue(section, newmaxCPUTime) changed = True if newsi00 and (si00 != newsi00): section = cfgPath(queueSection, "SI00") self.log.info(section, " -> ".join((si00, newsi00))) if si00 == "Unknown": self.csAPI.setOption(section, newsi00) else: self.csAPI.modifyValue(section, newsi00) changed = True if changed: self.log.info(body) if body and self.addressTo and self.addressFrom: notification = NotificationClient() result = notification.sendMail(self.addressTo, self.subject, body, self.addressFrom, localAttempt=False) return self.csAPI.commit() else: self.log.info("No changes found") return S_OK()
# Get my setup mySetup = gConfig.getValue('DIRAC/Setup') # Retrieve information from all the hosts client = SystemAdministratorIntegrator(exclude=excludedHosts) resultAll = client.getOverallStatus() notificationClient = NotificationClient() for host in resultAll['Value']: if not resultAll['Value'][host]['OK']: # If the host cannot be contacted, exclude it and send message excludedHosts.append(host) result = notificationClient.sendMail( Operations().getValue('EMail/Production', []), 'Unreachable host', '\ndirac-populate-component-db: Could not fill the database with the components from unreachable host %s\n' % host) if not result['OK']: gLogger.error( 'Can not send unreachable host notification mail: %s' % result['Message']) if not resultAll['OK']: gLogger.error(resultAll['Message']) DIRACexit(-1) resultHosts = client.getHostInfo() if not resultHosts['OK']: gLogger.error(resultHosts['Message']) DIRACexit(-1) resultInfo = client.getInfo() if not resultInfo['OK']:
class ComponentSupervisionAgent(AgentModule): """ComponentSupervisionAgent class.""" def __init__(self, *args, **kwargs): """Initialize the agent, clients, default values.""" AgentModule.__init__(self, *args, **kwargs) self.name = "ComponentSupervisionAgent" self.setup = "DIRAC-Production" self.enabled = False self.restartAgents = False self.restartExecutors = False self.restartServices = False self.controlComponents = False self.commitURLs = False self.doNotRestartInstancePattern = ["RequestExecutingAgent"] self.diracLocation = rootPath self.sysAdminClient = SystemAdministratorClient(socket.getfqdn()) self.jobMonClient = JobMonitoringClient() self.nClient = NotificationClient() self.csAPI = None self.agents = dict() self.executors = dict() self.services = dict() self._tornadoPort = "8443" self.errors = list() self.accounting = defaultdict(dict) self.addressTo = [] self.addressFrom = "" self.emailSubject = "ComponentSupervisionAgent on %s" % socket.getfqdn( ) def logError(self, errStr, varMsg=""): """Append errors to a list, which is sent in email notification.""" self.log.error(errStr, varMsg) self.errors.append(errStr + " " + varMsg) def beginExecution(self): """Reload the configurations before every cycle.""" self.setup = self.am_getOption("Setup", self.setup) self.enabled = self.am_getOption("EnableFlag", self.enabled) self.restartAgents = self.am_getOption("RestartAgents", self.restartAgents) self.restartExecutors = self.am_getOption("RestartExecutors", self.restartExecutors) self.restartServices = self.am_getOption("RestartServices", self.restartServices) self.addressTo = self.am_getOption("MailTo", self.addressTo) self.addressFrom = self.am_getOption("MailFrom", self.addressFrom) self.controlComponents = self.am_getOption("ControlComponents", self.controlComponents) self.commitURLs = self.am_getOption("CommitURLs", self.commitURLs) self.doNotRestartInstancePattern = self.am_getOption( "DoNotRestartInstancePattern", self.doNotRestartInstancePattern) self.csAPI = CSAPI() res = self.getRunningInstances(instanceType="Agents") if not res["OK"]: return S_ERROR("Failure to get running agents") self.agents = res["Value"] res = self.getRunningInstances(instanceType="Executors") if not res["OK"]: return S_ERROR("Failure to get running executors") self.executors = res["Value"] res = self.getRunningInstances(instanceType="Services") if not res["OK"]: return S_ERROR("Failure to get running services") self.services = res["Value"] self.accounting.clear() return S_OK() def sendNotification(self): """Send email notification about changes done in the last cycle.""" if not (self.errors or self.accounting): return S_OK() emailBody = "" rows = [] for instanceName, val in self.accounting.items(): rows.append([[instanceName], [val.get("Treatment", "No Treatment")], [str(val.get("LogAge", "Not Relevant"))]]) if rows: columns = ["Instance", "Treatment", "Log File Age (Minutes)"] emailBody += printTable(columns, rows, printOut=False, numbering=False, columnSeparator=" | ") if self.errors: emailBody += "\n\nErrors:" emailBody += "\n".join(self.errors) self.log.notice("Sending Email:\n" + emailBody) for address in self.addressTo: res = self.nClient.sendMail(address, self.emailSubject, emailBody, self.addressFrom, localAttempt=False) if not res["OK"]: self.log.error("Failure to send Email notification to ", address) continue self.errors = [] self.accounting.clear() return S_OK() def getRunningInstances(self, instanceType="Agents", runitStatus="Run"): """Return a dict of running agents, executors or services. Key is component's name, value contains dict with PollingTime, PID, Port, Module, RunitStatus, LogFileLocation :param str instanceType: 'Agents', 'Executors', 'Services' :param str runitStatus: Return only those instances with given RunitStatus or 'All' :returns: Dictionary of running instances """ res = self.sysAdminClient.getOverallStatus() if not res["OK"]: self.logError( "Failure to get %s from system administrator client" % instanceType, res["Message"]) return res val = res["Value"][instanceType] runningComponents = defaultdict(dict) for system, components in val.items(): for componentName, componentInfo in components.items(): if componentInfo["Setup"] and componentInfo["Installed"]: if runitStatus != "All" and componentInfo[ "RunitStatus"] != runitStatus: continue for option, default in (("PollingTime", HOUR), ("Port", None), ("Protocol", None)): runningComponents[componentName][ option] = self._getComponentOption( instanceType, system, componentName, option, default) # remove empty values so we can use defaults in _getURL if not runningComponents[componentName][option]: runningComponents[componentName].pop(option) runningComponents[componentName][ "LogFileLocation"] = os.path.join( self.diracLocation, "runit", system, componentName, "log", "current") runningComponents[componentName]["PID"] = componentInfo[ "PID"] runningComponents[componentName]["Module"] = componentInfo[ "Module"] runningComponents[componentName][ "RunitStatus"] = componentInfo["RunitStatus"] runningComponents[componentName]["System"] = system return S_OK(runningComponents) def _getComponentOption(self, instanceType, system, componentName, option, default): """Get component option from DIRAC CS, using components' base classes methods.""" componentPath = PathFinder.getComponentSection( system=system, component=componentName, setup=self.setup, componentCategory=instanceType, ) if instanceType != "Agents": return gConfig.getValue(Path.cfgPath(componentPath, option), default) # deal with agent configuration componentLoadModule = gConfig.getValue( Path.cfgPath(componentPath, "Module"), componentName) fullComponentName = Path.cfgPath(system, componentName) fullComponentLoadName = Path.cfgPath(system, componentLoadModule) return AgentModule(fullComponentName, fullComponentLoadName).am_getOption( option, default) def on_terminate(self, componentName, process): """Execute callback when a process terminates gracefully.""" self.log.info( "%s's process with ID: %s has been terminated successfully" % (componentName, process.pid)) def execute(self): """Execute checks for agents, executors, services.""" for instanceType in ("executor", "agent", "service"): for name, options in getattr(self, instanceType + "s").items(): # call checkAgent, checkExecutor, checkService res = getattr(self, "check" + instanceType.capitalize())(name, options) if not res["OK"]: self.logError("Failure when checking %s" % instanceType, "%s, %s" % (name, res["Message"])) res = self.componentControl() if not res["OK"]: if "Stopped does not exist" not in res[ "Message"] and "Running does not exist" not in res[ "Message"]: self.logError("Failure to control components", res["Message"]) if not self.errors: res = self.checkURLs() if not res["OK"]: self.logError("Failure to check URLs", res["Message"]) else: self.logError( "Something was wrong before, not checking URLs this time") self.sendNotification() if self.errors: return S_ERROR("Error during this cycle, check log") return S_OK() @staticmethod def getLastAccessTime(logFileLocation): """Return the age of log file.""" lastAccessTime = 0 try: lastAccessTime = os.path.getmtime(logFileLocation) lastAccessTime = datetime.fromtimestamp(lastAccessTime) except OSError as e: return S_ERROR("Failed to access logfile %s: %r" % (logFileLocation, e)) now = datetime.now() age = now - lastAccessTime return S_OK(age) def restartInstance(self, pid, instanceName, enabled): """Kill a process which is then restarted automatically.""" if not (self.enabled and enabled): self.log.info( "Restarting is disabled, please restart %s manually" % instanceName) self.accounting[instanceName][ "Treatment"] = "Please restart it manually" return S_OK(NO_RESTART) if any(pattern in instanceName for pattern in self.doNotRestartInstancePattern): self.log.info( "Restarting for %s is disabled, please restart it manually" % instanceName) self.accounting[instanceName][ "Treatment"] = "Please restart it manually" return S_OK(NO_RESTART) try: componentProc = psutil.Process(int(pid)) processesToTerminate = componentProc.children(recursive=True) processesToTerminate.append(componentProc) for proc in processesToTerminate: proc.terminate() _gone, alive = psutil.wait_procs(processesToTerminate, timeout=5, callback=partial( self.on_terminate, instanceName)) for proc in alive: self.log.info("Forcefully killing process %s" % proc.pid) proc.kill() return S_OK() except psutil.Error as err: self.logError("Exception occurred in terminating processes", "%s" % err) return S_ERROR() def checkService(self, serviceName, options): """Ping the service, restart if the ping does not respond.""" url = self._getURL(serviceName, options) self.log.info("Pinging service", url) pingRes = Client().ping(url=url) if not pingRes["OK"]: self.log.info("Failure pinging service: %s: %s" % (url, pingRes["Message"])) res = self.restartInstance(int(options["PID"]), serviceName, self.restartServices) if not res["OK"]: return res if res["Value"] != NO_RESTART: self.accounting[serviceName][ "Treatment"] = "Successfully Restarted" self.log.info("Service %s has been successfully restarted" % serviceName) self.log.info("Service responded OK") return S_OK() def checkAgent(self, agentName, options): """Check the age of agent's log file, if it is too old then restart the agent.""" pollingTime, currentLogLocation, pid = (options["PollingTime"], options["LogFileLocation"], options["PID"]) self.log.info("Checking Agent: %s" % agentName) self.log.info("Polling Time: %s" % pollingTime) self.log.info("Current Log File location: %s" % currentLogLocation) res = self.getLastAccessTime(currentLogLocation) if not res["OK"]: return res age = res["Value"] self.log.info("Current log file for %s is %d minutes old" % (agentName, (age.seconds / MINUTES))) maxLogAge = max(pollingTime + HOUR, 2 * HOUR) if age.seconds < maxLogAge: return S_OK() self.log.info("Current log file is too old for Agent %s" % agentName) self.accounting[agentName]["LogAge"] = age.seconds / MINUTES res = self.restartInstance(int(pid), agentName, self.restartAgents) if not res["OK"]: return res if res["Value"] != NO_RESTART: self.accounting[agentName]["Treatment"] = "Successfully Restarted" self.log.info("Agent %s has been successfully restarted" % agentName) return S_OK() def checkExecutor(self, executor, options): """Check the age of executor log file, if too old check for jobs in checking status, then restart the executors.""" currentLogLocation = options["LogFileLocation"] pid = options["PID"] self.log.info("Checking executor: %s" % executor) self.log.info("Current Log File location: %s" % currentLogLocation) res = self.getLastAccessTime(currentLogLocation) if not res["OK"]: return res age = res["Value"] self.log.info("Current log file for %s is %d minutes old" % (executor, (age.seconds / MINUTES))) if age.seconds < 2 * HOUR: return S_OK() self.log.info("Current log file is too old for Executor %s" % executor) self.accounting[executor]["LogAge"] = age.seconds / MINUTES res = self.checkForCheckingJobs(executor) if not res["OK"]: return res if res["OK"] and res["Value"] == NO_CHECKING_JOBS: self.accounting.pop(executor, None) return S_OK(NO_RESTART) res = self.restartInstance(int(pid), executor, self.restartExecutors) if not res["OK"]: return res elif res["OK"] and res["Value"] != NO_RESTART: self.accounting[executor]["Treatment"] = "Successfully Restarted" self.log.info("Executor %s has been successfully restarted" % executor) return S_OK() def checkForCheckingJobs(self, executorName): """Check if there are checking jobs with the **executorName** as current MinorStatus.""" attrDict = {"Status": "Checking", "MinorStatus": executorName} # returns list of jobs IDs resJobs = self.jobMonClient.getJobs(attrDict) if not resJobs["OK"]: self.logError("Could not get jobs for this executor", "%s: %s" % (executorName, resJobs["Message"])) return resJobs if resJobs["Value"]: self.log.info('Found %d jobs in "Checking" status for %s' % (len(resJobs["Value"]), executorName)) return S_OK(CHECKING_JOBS) self.log.info('Found no jobs in "Checking" status for %s' % executorName) return S_OK(NO_CHECKING_JOBS) def componentControl(self): """Monitor and control component status as defined in the CS. Check for running and stopped components and ensure they have the proper status as defined in the CS Registry/Hosts/_HOST_/[Running|Stopped] sections :returns: :func:`~DIRAC:DIRAC.Core.Utilities.ReturnValues.S_OK`, :func:`~DIRAC:DIRAC.Core.Utilities.ReturnValues.S_ERROR` """ # get the current status of the components resCurrent = self._getCurrentComponentStatus() if not resCurrent["OK"]: return resCurrent currentStatus = resCurrent["Value"] resDefault = self._getDefaultComponentStatus() if not resDefault["OK"]: return resDefault defaultStatus = resDefault["Value"] # ensure instances are in the right state shouldBe = {} shouldBe["Run"] = defaultStatus["Run"].intersection( currentStatus["Down"]) shouldBe["Down"] = defaultStatus["Down"].intersection( currentStatus["Run"]) shouldBe["Unknown"] = defaultStatus["All"].symmetric_difference( currentStatus["All"]) self._ensureComponentRunning(shouldBe["Run"]) self._ensureComponentDown(shouldBe["Down"]) for instance in shouldBe["Unknown"]: self.logError("Unknown instance", "%r, either uninstall or add to config" % instance) return S_OK() def _getCurrentComponentStatus(self): """Get current status for components.""" resOverall = self.sysAdminClient.getOverallStatus() if not resOverall["OK"]: return resOverall currentStatus = {"Down": set(), "Run": set(), "All": set()} informationDict = resOverall["Value"] for systemsDict in informationDict.values(): for system, instancesDict in systemsDict.items(): for instanceName, instanceInfoDict in instancesDict.items(): identifier = "%s__%s" % (system, instanceName) runitStatus = instanceInfoDict.get("RunitStatus") if runitStatus in ("Run", "Down"): currentStatus[runitStatus].add(identifier) currentStatus["All"] = currentStatus["Run"] | currentStatus["Down"] return S_OK(currentStatus) def _getDefaultComponentStatus(self): """Get the configured status of the components.""" host = socket.getfqdn() defaultStatus = {"Down": set(), "Run": set(), "All": set()} resRunning = gConfig.getOptionsDict( Path.cfgPath("/Registry/Hosts/", host, "Running")) resStopped = gConfig.getOptionsDict( Path.cfgPath("/Registry/Hosts/", host, "Stopped")) if not resRunning["OK"]: return resRunning if not resStopped["OK"]: return resStopped defaultStatus["Run"] = set(resRunning["Value"]) defaultStatus["Down"] = set(resStopped["Value"]) defaultStatus["All"] = defaultStatus["Run"] | defaultStatus["Down"] if defaultStatus["Run"].intersection(defaultStatus["Down"]): self.logError( "Overlap in configuration", str(defaultStatus["Run"].intersection(defaultStatus["Down"]))) return S_ERROR("Bad host configuration") return S_OK(defaultStatus) def _ensureComponentRunning(self, shouldBeRunning): """Ensure the correct components are running.""" for instance in shouldBeRunning: self.log.info("Starting instance %s" % instance) system, name = instance.split("__") if self.controlComponents: res = self.sysAdminClient.startComponent(system, name) if not res["OK"]: self.logError("Failed to start component:", "%s: %s" % (instance, res["Message"])) else: self.accounting[instance][ "Treatment"] = "Instance was down, started instance" else: self.accounting[instance][ "Treatment"] = "Instance is down, should be started" def _ensureComponentDown(self, shouldBeDown): """Ensure the correct components are not running.""" for instance in shouldBeDown: self.log.info("Stopping instance %s" % instance) system, name = instance.split("__") if self.controlComponents: res = self.sysAdminClient.stopComponent(system, name) if not res["OK"]: self.logError("Failed to stop component:", "%s: %s" % (instance, res["Message"])) else: self.accounting[instance][ "Treatment"] = "Instance was running, stopped instance" else: self.accounting[instance][ "Treatment"] = "Instance is running, should be stopped" def checkURLs(self): """Ensure that the running services have their URL in the Config.""" self.log.info("Checking URLs") # get services again, in case they were started/stop in controlComponents gConfig.forceRefresh(fromMaster=True) # get port used for https based services try: tornadoSystemInstance = PathFinder.getSystemInstance( system="Tornado", setup=self.setup, ) self._tornadoPort = gConfig.getValue( Path.cfgPath("/System/Tornado/", tornadoSystemInstance, "Port"), self._tornadoPort, ) except RuntimeError: pass self.log.debug("Using Tornado Port:", self._tornadoPort) res = self.getRunningInstances(instanceType="Services", runitStatus="All") if not res["OK"]: return S_ERROR("Failure to get running services") self.services = res["Value"] for service, options in sorted(self.services.items()): self.log.debug("Checking URL for %s with options %s" % (service, options)) # ignore SystemAdministrator, does not have URLs if "SystemAdministrator" in service: continue self._checkServiceURL(service, options) if self.csAPI.csModified and self.commitURLs: self.log.info("Commiting changes to the CS") result = self.csAPI.commit() if not result["OK"]: self.logError("Commit to CS failed", result["Message"]) return S_ERROR("Failed to commit to CS") return S_OK() def _checkServiceURL(self, serviceName, options): """Ensure service URL is properly configured in the CS.""" url = self._getURL(serviceName, options) system = options["System"] module = options["Module"] self.log.info("Checking URLs for %s/%s" % (system, module)) urlsConfigPath = Path.cfgPath( PathFinder.getSystemURLSection(system=system, setup=self.setup), module) urls = gConfig.getValue(urlsConfigPath, []) self.log.debug("Found configured URLs for %s: %s" % (module, urls)) self.log.debug("This URL is %s" % url) runitStatus = options["RunitStatus"] wouldHave = "Would have " if not self.commitURLs else "" if runitStatus == "Run" and url not in urls: urls.append(url) message = "%sAdded URL %s to URLs for %s/%s" % (wouldHave, url, system, module) self.log.info(message) self.accounting[serviceName + "/URL"]["Treatment"] = message self.csAPI.modifyValue(urlsConfigPath, ",".join(urls)) if runitStatus == "Down" and url in urls: urls.remove(url) message = "%sRemoved URL %s from URLs for %s/%s" % (wouldHave, url, system, module) self.log.info(message) self.accounting[serviceName + "/URL"]["Treatment"] = message self.csAPI.modifyValue(urlsConfigPath, ",".join(urls)) def _getURL(self, serviceName, options): """Return URL for the service.""" system = options["System"] port = options.get("Port", self._tornadoPort) host = socket.getfqdn() protocol = options.get("Protocol", "dips") url = "%s://%s:%s/%s/%s" % (protocol, host, port, system, serviceName) return url
class AnalyseXMLSummary(ModuleBase): """ Analysing the XML summary """ def __init__(self, bkClient=None, dm=None): """Module initialization. """ self.log = gLogger.getSubLogger('AnalyseXMLSummary') super(AnalyseXMLSummary, self).__init__(self.log, bkClientIn=bkClient, dm=dm) self.version = __RCSID__ self.nc = NotificationClient() self.XMLSummary = '' self.XMLSummary_o = None def _resolveInputVariables(self): """ By convention any workflow parameters are resolved here. """ super(AnalyseXMLSummary, self)._resolveInputVariables() super(AnalyseXMLSummary, self)._resolveInputStep() self.XMLSummary_o = XMLSummary(self.XMLSummary, log=self.log) def execute(self, production_id=None, prod_job_id=None, wms_job_id=None, workflowStatus=None, stepStatus=None, wf_commons=None, step_commons=None, step_number=None, step_id=None): """ Main execution method. Here we analyse what is written in the XML summary, and take decisions accordingly """ try: super(AnalyseXMLSummary, self).execute(self.version, production_id, prod_job_id, wms_job_id, workflowStatus, stepStatus, wf_commons, step_commons, step_number, step_id) self._resolveInputVariables() self.log.info("Performing XML summary analysis for %s" % (self.XMLSummary)) # Resolve the step and job input data self.step_commons['XMLSummary_o'] = self.XMLSummary_o failTheJob = False if self.XMLSummary_o.success == 'True' \ and self.XMLSummary_o.step == 'finalize' \ and self.XMLSummary_o._outputsOK() \ and not self.XMLSummary_o.inputFileStats['mult'] \ and not self.XMLSummary_o.inputFileStats['other']: # basic success, now check for failures in the input files failTheJob = self._basicSuccess() else: # here fails! failTheJob = True if failTheJob: self._finalizeWithErrors("XMLSummary reports error") self.setApplicationStatus("XMLSummary reports error") return S_ERROR("XMLSummary reports error") # if the XMLSummary looks ok but the step already failed, preserve the previous error if not self.stepStatus['OK']: return S_OK() self.log.info('XML summary %s' % self.XMLSummary) self.setApplicationStatus('%s Step OK' % self.applicationName) return S_OK() except Exception as e: # pylint:disable=broad-except self.log.exception("Failure in AnalyseXMLSummary execute module", lException=e) self.setApplicationStatus(repr(e)) return S_ERROR(str(e)) finally: super(AnalyseXMLSummary, self).finalize(self.version) def _basicSuccess(self): """ Treats basic success, meaning the outputs and the status of the XML summary are ok. Now, we have to check the input files if they are in "part" or "fail" """ failTheJob = False if self.XMLSummary_o.inputFileStats['part']: if self.numberOfEvents != -1: self.log.info("Input on part is ok, since we are not processing all") # this is not an error else: # report to FileReport filesInPart = [x[0].strip('LFN:') for x in self.XMLSummary_o.inputStatus if x[1] == 'part'] self.log.error("Files %s are in status 'part'" % ';'.join(filesInPart)) for fileInPart in filesInPart: if fileInPart in self.inputDataList: self.log.error("Reporting %s as 'Problematic'" % fileInPart) self.fileReport.setFileStatus(int(self.production_id), fileInPart, 'Problematic') failTheJob = True if self.XMLSummary_o.inputFileStats['fail']: # report to FileReport filesInFail = [x[0].strip('LFN:') for x in self.XMLSummary_o.inputStatus if x[1] == 'fail'] self.log.error("Files %s are in status 'fail'" % ';'.join(filesInFail)) for fileInFail in filesInFail: if fileInFail in self.inputDataList: self.log.error("Reporting %s as 'Problematic'" % fileInFail) self.fileReport.setFileStatus(int(self.production_id), fileInFail, 'Problematic') failTheJob = True return failTheJob def _finalizeWithErrors(self, subj): """ Method that sends an email and uploads intermediate job outputs. """ # Have to check that the output list is defined in the workflow commons, this is # done by the first BK report module that executes at the end of a step but in # this case the current step 'listoutput' must be added. if 'outputList' in self.workflow_commons: for outputItem in self.step_commons['listoutput']: if outputItem not in self.workflow_commons['outputList']: self.workflow_commons['outputList'].append(outputItem) else: self.workflow_commons['outputList'] = self.step_commons['listoutput'] result = constructProductionLFNs(self.workflow_commons, self.bkClient) if not result['OK']: self.log.error('Could not create production LFNs with message "%s"' % (result['Message'])) raise Exception(result['Message']) if 'DebugLFNs' not in result['Value']: self.log.error('No debug LFNs found after creating production LFNs, result was:%s' % result) raise Exception('DebugLFNs Not Found') debugLFNs = result['Value']['DebugLFNs'] subject = '[' + self.siteName + '][' + self.applicationName + '] ' + self.applicationVersion + \ ": " + subj + ' ' + self.production_id + '_' + self.prod_job_id + ' JobID=' + str(self.jobID) msg = 'The Application ' + self.applicationName + ' ' + self.applicationVersion + ' had a problem \n' msg = msg + 'at site ' + self.siteName + '\n' msg = msg + 'JobID is ' + str(self.jobID) + '\n' msg = msg + 'JobName is ' + self.production_id + '_' + self.prod_job_id + '\n' toUpload = {} for lfn in debugLFNs: if os.path.exists(os.path.basename(lfn)): toUpload[os.path.basename(lfn)] = lfn if toUpload: msg += '\n\nIntermediate job data files:\n' for fname, lfn in toUpload.items(): guidResult = getGUID(fname) guidInput = '' if not guidResult['OK']: self.log.error('Could not find GUID for %s with message' % (fname), guidResult['Message']) elif guidResult['generated']: self.log.info('PoolXMLFile generated GUID(s) for the following files ', ', '.join(guidResult['generated'])) guidInput = guidResult['Value'][fname] else: guidInput = guidResult['Value'][fname] if self._WMSJob(): self.log.info('Attempting: dm.putAndRegister("%s","%s","%s","%s") on master catalog' % (fname, lfn, guidInput, self.debugSE)) result = DataManager(masterCatalogOnly=True).putAndRegister(lfn, fname, self.debugSE, guidInput) self.log.info(result) if not result['OK']: self.log.error('Could not save INPUT data file with result', str(result['Message'])) msg += 'Could not save intermediate data file %s with result\n%s\n' % (fname, result['Message']) else: msg = msg + lfn + '\n' + str(result) + '\n' else: self.log.info("JOBID is null, would have attempted to upload: LFN:%s, file %s, GUID %s to %s" % (lfn, fname, guidInput, self.debugSE)) if not self._WMSJob(): self.log.info("JOBID is null, *NOT* sending mail, for information the mail was:\n====>Start\n%s\n<====End" % (msg)) else: mailAddress = self.opsH.getValue('EMail/JobFailures', '*****@*****.**') self.log.info('Sending crash mail for job to %s' % (mailAddress)) res = self.nc.sendMail(mailAddress, subject, msg, '*****@*****.**', localAttempt=False) if not res['OK']: self.log.warn("The mail could not be sent")
class TokenAgent( AgentModule ): ''' TokenAgent is in charge of checking tokens assigned on resources. Notifications are sent to those users owning expiring tokens. ''' # Too many public methods # pylint: disable-msg=R0904 def initialize( self ): ''' TokenAgent initialization ''' # Attribute defined outside __init__ # pylint: disable-msg=W0201 self.notifyHours = self.am_getOption( 'notifyHours', 10 ) try: self.rsClient = ResourceStatusClient() self.rmClient = ResourceManagementClient() self.noClient = NotificationClient() return S_OK() except Exception: errorStr = "TokenAgent initialization" self.log.exception( errorStr ) return S_ERROR( errorStr ) def execute( self ): ''' The main TokenAgent execution method. Checks for tokens owned by users that are expiring, and notifies those users. Calls rsClient.setToken() to set 'RS_SVC' as owner for those tokens that expired. ''' adminMail = '' try: reason = 'Out of date token' #reAssign the token to RS_SVC #for g in self.ELEMENTS: validElements = RssConfiguration.getValidElements() for granularity in validElements: tokensExpired = self.rsClient.getTokens( granularity, tokenExpiration = datetime.datetime.utcnow() ) if tokensExpired[ 'Value' ]: adminMail += '\nLIST OF EXPIRED %s TOKENS\n' % granularity adminMail += '%s|%s|%s\n' % ( 'user'.ljust(20), 'name'.ljust(15), 'status type') for token in tokensExpired[ 'Value' ]: name = token[ 1 ] stype = token[ 2 ] user = token[ 9 ] self.rsClient.setToken( granularity, name, stype, reason, 'RS_SVC', datetime.datetime( 9999, 12, 31, 23, 59, 59 ) ) adminMail += ' %s %s %s\n' %( user.ljust(20), name.ljust(15), stype ) #notify token owners inNHours = datetime.datetime.utcnow() + datetime.timedelta( hours = self.notifyHours ) #for g in self.ELEMENTS: for granularity in validElements: tokensExpiring = self.rsClient.getTokens( granularity, tokenExpiration = inNHours ) if tokensExpiring[ 'Value' ]: adminMail += '\nLIST OF EXPIRING %s TOKENS\n' % granularity adminMail += '%s|%s|%s\n' % ( 'user'.ljust(20),'name'.ljust(15),'status type') for token in tokensExpiring[ 'Value' ]: name = token[ 1 ] stype = token[ 2 ] user = token[ 9 ] adminMail += '\n %s %s %s\n' %( user.ljust(20), name.ljust(15), stype ) #If user is RS_SVC, we ignore this, whenever the token is out, this #agent will set again the token to RS_SVC if user == 'RS_SVC': continue pdp = PDP( granularity = granularity, name = name, statusType = stype ) decision = pdp.takeDecision() pcresult = decision[ 'PolicyCombinedResult' ] spresult = decision[ 'SinglePolicyResults' ] expiration = token[ 10 ] mailMessage = "The token for %s %s ( %s )" % ( granularity, name, stype ) mailMessage = mailMessage + " will expire on %s\n\n" % expiration mailMessage = mailMessage + "You can renew it with command 'dirac-rss-renew-token'.\n" mailMessage = mailMessage + "If you don't take any action, RSS will take control of the resource.\n\n" policyMessage = '' if pcresult[ 'Action' ]: policyMessage += " Policies applied will set status to %s.\n" % pcresult[ 'Status' ] for spr in spresult: policyMessage += " %s Status->%s\n" % ( spr[ 'PolicyName' ].ljust(25), spr[ 'Status' ] ) mailMessage += policyMessage adminMail += policyMessage self.noClient.sendMail( self.rmClient.getUserRegistryCache( user )[ 'Value' ][ 0 ][ 2 ], 'Token for %s is expiring' % name, mailMessage ) if adminMail != '': #FIXME: 'ubeda' is not generic ;p self.noClient.sendMail( self.rmClient.getUserRegistryCache( 'ubeda' )[ 'Value' ][ 0 ][ 2 ], "Token's summary", adminMail ) return S_OK() except Exception: errorStr = "TokenAgent execution" self.log.exception( errorStr ) return S_ERROR( errorStr ) ################################################################################ #EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF
class TokenAgent(AgentModule): ############################################################################# def initialize(self): """ TokenAgent initialization """ self.ELEMENTS = ['Site', 'StorageElementRead', 'StorageElementWrite'] self.notifyHours = self.am_getOption('notifyHours', 10) try: self.rsDB = ResourceStatusDB() self.nc = NotificationClient() self.VOExt = getExt() return S_OK() except Exception: errorStr = "TokenAgent initialization" gLogger.exception(errorStr) return S_ERROR(errorStr) ############################################################################# def execute(self): """ The main TokenAgent execution method. Checks for tokens owned by users that are expiring, and notifies those users. Calls rsDB.setToken() to set 'RS_SVC' as owner for those tokens that expired. """ adminMail = '' try: #reAssign the token to RS_SVC for g in self.ELEMENTS: tokensExpired = self.rsDB.getTokens(g, None, datetime.datetime.utcnow()) if tokensExpired: adminMail += '\nLIST OF EXPIRED TOKENS\n' for token in tokensExpired: name = token[0] user = token[1] self.rsDB.setToken( g, name, 'RS_SVC', datetime.datetime(9999, 12, 31, 23, 59, 59)) adminMail += ' %s %s\n' % (user.ljust(20), name) #notify token owners inNHours = datetime.datetime.utcnow() + datetime.timedelta( hours=self.notifyHours) for g in self.ELEMENTS: tokensExpiring = self.rsDB.getTokens(g, None, inNHours) if tokensExpiring: adminMail += '\nLIST OF EXPIRING TOKENS\n' for token in tokensExpiring: name = token[0] user = token[1] adminMail += '\n %s %s\n' % (user.ljust(20), name) if user == 'RS_SVC': continue pdp = PDP(self.VOExt, granularity=g, name=name) decision = pdp.takeDecision() pcresult = decision['PolicyCombinedResult'] spresult = decision['SinglePolicyResults'] expiration = token[2] mailMessage = "The token for %s %s " % (g, name) mailMessage = mailMessage + "will expire on %s\n\n" % expiration mailMessage = mailMessage + "You can renew it with command 'dirac-rss-renew-token'.\n" mailMessage = mailMessage + "If you don't take any action, RSS will take control of the resource.\n\n" policyMessage = '' if pcresult: policyMessage += " Policies applied will set status to %s.\n" % pcresult[ 'Status'] for spr in spresult: policyMessage += " %s Status->%s\n" % ( spr['PolicyName'].ljust(25), spr['Status']) mailMessage += policyMessage adminMail += policyMessage self.nc.sendMail( getMailForUser(user)['Value'][0], 'Token for %s is expiring' % name, mailMessage) if adminMail != '': self.nc.sendMail( getMailForUser('ubeda')['Value'][0], "Token's summary", adminMail) return S_OK() except Exception: errorStr = "TokenAgent execution" gLogger.exception(errorStr) return S_ERROR(errorStr) #############################################################################
class TopErrorMessagesReporter( AgentModule ): def initialize( self ): self.systemLoggingDB = SystemLoggingDB() self.agentName = self.am_getModuleParam( 'fullName' ) self.notification = NotificationClient() mailList = self.am_getOption( "MailList", [] ) userList = self.am_getOption( "Reviewer", [] ) self.log.debug( "Users to be notified:", ', '.join( userList ) ) for user in userList: mail = getUserOption( user, 'Email', '' ) if not mail: self.log.warn( "Could not get user's mail", user ) else: mailList.append( mail ) if not mailList: mailList = Operations().getValue( 'EMail/Logging', [] ) if not len( mailList ): errString = "There are no valid users in the list" varString = "[" + ','.join( userList ) + "]" self.log.error( errString, varString ) return S_ERROR( errString + varString ) self.log.info( "List of mails to be notified", ','.join( mailList ) ) self._mailAddress = mailList self._threshold = int( self.am_getOption( 'Threshold', 10 ) ) self.__days = self.am_getOption( 'QueryPeriod', 7 ) self._period = int( self.__days ) * day self._limit = int ( self.am_getOption( 'NumberOfErrors', 10 ) ) string = "The %i most common errors in the SystemLoggingDB" % self._limit self._subject = string + " for the last %s days" % self.__days return S_OK() def execute( self ): """ The main agent execution method """ limitDate = date() - self._period tableList = [ "MessageRepository", "FixedTextMessages", "Systems", "SubSystems" ] columnsList = [ "SystemName", "SubSystemName", "count(*) as entries", "FixedTextString" ] cmd = "SELECT " + ', '.join( columnsList ) + " FROM " \ + " NATURAL JOIN ".join( tableList ) \ + " WHERE MessageTime > '%s'" % limitDate \ + " GROUP BY FixedTextString HAVING entries > %s" % self._threshold \ + " ORDER BY entries DESC LIMIT %i;" % self._limit result = self.systemLoggingDB._query( cmd ) if not result['OK']: return result messageList = result['Value'] if messageList == 'None' or messageList == (): self.log.warn( 'The DB query returned an empty result' ) return S_OK() mailBody = '\n' for message in messageList: mailBody = mailBody + "Count: " + str( message[2] ) + "\tError: '"\ + message[3] + "'\tSystem: '" + message[0]\ + "'\tSubsystem: '" + message[1] + "'\n" mailBody = mailBody + "\n\n-------------------------------------------------------\n"\ + "Please do not reply to this mail. It was automatically\n"\ + "generated by a Dirac Agent.\n" result = self.systemLoggingDB._getDataFromAgentTable( self.agentName ) self.log.debug( result ) if not result['OK']: errorString = "Could not get the date when the last mail was sent" self.log.error( errorString ) return S_ERROR( errorString ) else: if len( result['Value'] ): self.log.debug( "date value: %s" % fromString( result['Value'][0][0][1:-1] ) ) lastMailSentDate = fromString( result['Value'][0][0][1:-1] ) else: lastMailSentDate = limitDate - 1 * day result = self.systemLoggingDB._insertDataIntoAgentTable( self.agentName, lastMailSentDate ) if not result['OK']: errorString = "Could not insert data into the DB" self.log.error( errorString, result['Message'] ) return S_ERROR( errorString + ": " + result['Message'] ) self.log.debug( "limitDate: %s\t" % limitDate \ + "lastMailSentDate: %s\n" % lastMailSentDate ) if lastMailSentDate > limitDate: self.log.info( "The previous report was sent less "\ + " than %s days ago" % self.__days ) return S_OK() dateSent = toString( date() ) self.log.info( "The list with the top errors has been sent" ) result = self.systemLoggingDB._insertDataIntoAgentTable( self.agentName, dateSent ) if not result['OK']: errorString = "Could not insert data into the DB" self.log.error( errorString, result['Message'] ) return S_ERROR( errorString + ": " + result['Message'] ) result = self.notification.sendMail( self._mailAddress, self._subject, mailBody ) if not result[ 'OK' ]: self.log.warn( "The notification could not be sent" ) return S_OK() return S_OK( "The list with the top errors has been sent" )
class ErrorMessageMonitor(AgentModule): def initialize(self): self.SystemLoggingDB = SystemLoggingDB() self.notification=NotificationClient() userString = self.am_getOption( "Reviewer", 'mseco' ) self.log.debug( "Users to be notified", ": " + userString ) userList = List.fromChar( userString, ",") mailList = [] for user in userList: retval = gConfig.getOption( "/Registry/Users/" + user + "/Email" ) if not retval['OK']: self.log.warn( "Could not get user's mail", retval['Message'] ) else: mailList.append( retval['Value'] ) if not mailList: mailList = gConfig.getValue( '/Operations/EMail/Logging', [] ) if not len(mailList): errString = "There are no valid users in the list" varString = "[" + ','.join( userList ) + "]" self.log.error( errString, varString ) return S_ERROR( errString + varString ) self.log.info( "List of mails to be notified", ','.join( mailList ) ) self._mailAddress = mailList self._subject = 'New error messages were entered in the SystemLoggingDB' return S_OK() def execute(self): """ The main agent execution method """ cmd = "SELECT count(*) FROM FixedTextMessages WHERE ReviewedMessage=0" result = self.SystemLoggingDB._query( cmd ) if not result['OK']: return result recordsToReview=result['Value'][0][0] if recordsToReview == 0: self.log.info('No messages need review') return S_OK('No messages need review') else: conds = { 'ReviewedMessage': '0' } returnFields = [ 'FixedTextID','FixedTextString', 'SystemName', 'SubSystemName' ] result = self.SystemLoggingDB._queryDB( showFieldList = returnFields, groupColumn = 'FixedTextString', condDict = conds ) if not result['OK']: self.log.error('Failed to obtain the non reviewed Strings', result['Message']) return S_OK() messageList = result['Value'] if messageList == 'None' or messageList == (): self.log.error('The DB query returned an empty result') return S_OK() mailBody ='These new messages have arrived to the Logging Service\n' for message in messageList: mailBody = mailBody + "String: '" + message[1] + "'\tSystem: '" \ + message[2] + "'\tSubsystem: '" + message[3] + "'\n" result = self.notification.sendMail( self._mailAddress, self._subject, mailBody ) if not result[ 'OK' ]: self.log.warn( "The mail could not be sent" ) return S_OK() for message in messageList: cmd = "UPDATE LOW_PRIORITY FixedTextMessages SET ReviewedMessage=1" cond = " WHERE FixedTextID=%s" % message[0] result = self.SystemLoggingDB._update( cmd + cond ) self.log.verbose('Message Status updated', '(%d, %s)' % (message[0], message[1])) if not result['OK']: self.log.error( 'Could not update status of Message', message[1] ) return S_OK() self.log.info( "The messages have been sent for review", "There are %s new descriptions" % recordsToReview ) return S_OK( "%s Messages have been sent for review" % recordsToReview )
class ProxyDB( DB ): NOTIFICATION_TIMES = [ 2592000, 1296000 ] def __init__( self, useMyProxy = False ): DB.__init__( self, 'ProxyDB', 'Framework/ProxyDB' ) random.seed() self.__defaultRequestLifetime = 300 # 5min self.__defaultTokenLifetime = 86400 * 7 # 1 week self.__defaultTokenMaxUses = 50 self.__useMyProxy = useMyProxy self._minSecsToAllowStore = 3600 self.__notifClient = NotificationClient() retVal = self.__initializeDB() if not retVal[ 'OK' ]: raise Exception( "Can't create tables: %s" % retVal[ 'Message' ] ) self.purgeExpiredProxies( sendNotifications = False ) self.__checkDBVersion() def getMyProxyServer( self ): return gConfig.getValue( "/DIRAC/VOPolicy/MyProxyServer" , "myproxy.cern.ch" ) def getMyProxyMaxLifeTime( self ): return gConfig.getValue( "/DIRAC/VOPolicy/MyProxyMaxDelegationTime", 168 ) * 3600 def getFromAddr( self ): """ Get the From address to use in proxy expiry e-mails. """ cs_path = getDatabaseSection( self.fullname ) opt_path = "/%s/%s" % ( cs_path, "FromAddr" ) return gConfig.getValue( opt_path, "*****@*****.**" ) def __initializeDB( self ): """ Create the tables """ retVal = self._query( "show tables" ) if not retVal[ 'OK' ]: return retVal tablesInDB = [ t[0] for t in retVal[ 'Value' ] ] tablesD = {} if 'ProxyDB_Requests' not in tablesInDB: tablesD[ 'ProxyDB_Requests' ] = { 'Fields' : { 'Id' : 'INTEGER AUTO_INCREMENT NOT NULL', 'UserDN' : 'VARCHAR(255) NOT NULL', 'Pem' : 'BLOB', 'ExpirationTime' : 'DATETIME' }, 'PrimaryKey' : 'Id' } if 'ProxyDB_Proxies' not in tablesInDB: tablesD[ 'ProxyDB_Proxies' ] = { 'Fields' : { 'UserName' : 'VARCHAR(64) NOT NULL', 'UserDN' : 'VARCHAR(255) NOT NULL', 'UserGroup' : 'VARCHAR(255) NOT NULL', 'Pem' : 'BLOB', 'ExpirationTime' : 'DATETIME', 'PersistentFlag' : 'ENUM ("True","False") NOT NULL DEFAULT "True"', }, 'PrimaryKey' : [ 'UserDN', 'UserGroup' ] } if 'ProxyDB_VOMSProxies' not in tablesInDB: tablesD[ 'ProxyDB_VOMSProxies' ] = { 'Fields' : { 'UserName' : 'VARCHAR(64) NOT NULL', 'UserDN' : 'VARCHAR(255) NOT NULL', 'UserGroup' : 'VARCHAR(255) NOT NULL', 'VOMSAttr' : 'VARCHAR(255) NOT NULL', 'Pem' : 'BLOB', 'ExpirationTime' : 'DATETIME', }, 'PrimaryKey' : [ 'UserDN', 'UserGroup', 'vomsAttr' ] } if 'ProxyDB_Log' not in tablesInDB: tablesD[ 'ProxyDB_Log' ] = { 'Fields' : { 'ID': 'BIGINT NOT NULL AUTO_INCREMENT', 'IssuerDN' : 'VARCHAR(255) NOT NULL', 'IssuerGroup' : 'VARCHAR(255) NOT NULL', 'TargetDN' : 'VARCHAR(255) NOT NULL', 'TargetGroup' : 'VARCHAR(255) NOT NULL', 'Action' : 'VARCHAR(128) NOT NULL', 'Timestamp' : 'DATETIME', }, 'PrimaryKey': 'ID', 'Indexes' : { 'Timestamp' : [ 'Timestamp' ]} } if 'ProxyDB_Tokens' not in tablesInDB: tablesD[ 'ProxyDB_Tokens' ] = { 'Fields' : { 'Token' : 'VARCHAR(64) NOT NULL', 'RequesterDN' : 'VARCHAR(255) NOT NULL', 'RequesterGroup' : 'VARCHAR(255) NOT NULL', 'ExpirationTime' : 'DATETIME NOT NULL', 'UsesLeft' : 'SMALLINT UNSIGNED DEFAULT 1', }, 'PrimaryKey' : 'Token' } if 'ProxyDB_ExpNotifs' not in tablesInDB: tablesD[ 'ProxyDB_ExpNotifs' ] = { 'Fields' : { 'UserDN' : 'VARCHAR(255) NOT NULL', 'UserGroup' : 'VARCHAR(255) NOT NULL', 'LifeLimit' : 'INTEGER UNSIGNED DEFAULT 0', 'ExpirationTime' : 'DATETIME NOT NULL', }, 'PrimaryKey' : [ 'UserDN', 'UserGroup' ] } return self._createTables( tablesD ) def __addUserNameToTable( self, tableName ): result = self._update( "ALTER TABLE `%s` ADD COLUMN UserName VARCHAR(64) NOT NULL" % tableName ) if not result[ 'OK' ]: return result result = self._query( "SELECT DISTINCT UserName, UserDN FROM `%s`" % tableName ) if not result[ 'OK' ]: return result data = result[ 'Value' ] for userName, userDN in data: if not userName: result = Registry.getUsernameForDN( userDN ) if not result[ 'OK' ]: self.log.error( "Could not retrieve username for DN", userDN ) continue userName = result[ 'Value' ] try: userName = self._escapeString( userName )[ 'Value' ] userDN = self._escapeString( userDN )[ 'Value' ] except KeyError: self.log.error( "Could not escape username or DN", "%s %s" % ( userName, userDN ) ) continue userName = result[ 'Value' ] result = self._update( "UPDATE `%s` SET UserName=%s WHERE UserDN=%s" % ( tableName, userName, userDN ) ) if not result[ 'OK' ]: self.log.error( "Could update username for DN", "%s: %s" % ( userDN, result[ 'Message' ] ) ) continue self.log.info( "UserDN %s has user %s" % ( userDN, userName ) ) return S_OK() def __checkDBVersion( self ): for tableName in ( "ProxyDB_Proxies", "ProxyDB_VOMSProxies" ): result = self._query( "describe `%s`" % tableName ) if not result[ 'OK' ]: return result if 'UserName' not in [ row[0] for row in result[ 'Value' ] ]: self.log.notice( "Username missing in table %s schema. Adding it" % tableName ) result = self.__addUserNameToTable( tableName ) if not result[ 'OK' ]: return result def generateDelegationRequest( self, proxyChain, userDN ): """ Generate a request and store it for a given proxy Chain """ retVal = self._getConnection() if not retVal[ 'OK' ]: return retVal connObj = retVal[ 'Value' ] retVal = proxyChain.generateProxyRequest() if not retVal[ 'OK' ]: return retVal request = retVal[ 'Value' ] retVal = request.dumpRequest() if not retVal[ 'OK' ]: return retVal reqStr = retVal[ 'Value' ] retVal = request.dumpPKey() if not retVal[ 'OK' ]: return retVal allStr = reqStr + retVal[ 'Value' ] try: sUserDN = self._escapeString( userDN )[ 'Value' ] sAllStr = self._escapeString( allStr )[ 'Value' ] except KeyError: return S_ERROR( "Cannot escape DN" ) cmd = "INSERT INTO `ProxyDB_Requests` ( Id, UserDN, Pem, ExpirationTime )" cmd += " VALUES ( 0, %s, %s, TIMESTAMPADD( SECOND, %d, UTC_TIMESTAMP() ) )" % ( sUserDN, sAllStr, int( self.__defaultRequestLifetime ) ) retVal = self._update( cmd, conn = connObj ) if not retVal[ 'OK' ]: return retVal #99% of the times we will stop here if 'lastRowId' in retVal: return S_OK( { 'id' : retVal['lastRowId'], 'request' : reqStr } ) #If the lastRowId hack does not work. Get it by hand retVal = self._query( "SELECT Id FROM `ProxyDB_Requests` WHERE Pem='%s'" % reqStr ) if not retVal[ 'OK' ]: return retVal data = retVal[ 'Value' ] if len( data ) == 0: return S_ERROR( "Insertion of the request in the db didn't work as expected" ) retVal = proxyChain.getDIRACGroup() if retVal[ 'OK' ] and retVal[ 'Value' ]: userGroup = retVal[ 'Value' ] else: userGroup = "unset" self.logAction( "request upload", userDN, userGroup, userDN, "any" ) #Here we go! return S_OK( { 'id' : data[0][0], 'request' : reqStr } ) def retrieveDelegationRequest( self, requestId, userDN ): """ Retrieve a request from the DB """ try: sUserDN = self._escapeString( userDN )[ 'Value' ] except KeyError: return S_ERROR( "Cannot escape DN" ) cmd = "SELECT Pem FROM `ProxyDB_Requests` WHERE Id = %s AND UserDN = %s" % ( requestId, sUserDN ) retVal = self._query( cmd ) if not retVal[ 'OK' ]: return retVal data = retVal[ 'Value' ] if len( data ) == 0: return S_ERROR( "No requests with id %s" % requestId ) request = X509Request() retVal = request.loadAllFromString( data[0][0] ) if not retVal[ 'OK' ]: return retVal return S_OK( request ) def purgeExpiredRequests( self ): """ Purge expired requests from the db """ cmd = "DELETE FROM `ProxyDB_Requests` WHERE ExpirationTime < UTC_TIMESTAMP()" return self._update( cmd ) def deleteRequest( self, requestId ): """ Delete a request from the db """ cmd = "DELETE FROM `ProxyDB_Requests` WHERE Id=%s" % requestId return self._update( cmd ) def completeDelegation( self, requestId, userDN, delegatedPem ): """ Complete a delegation and store it in the db """ retVal = self.retrieveDelegationRequest( requestId, userDN ) if not retVal[ 'OK' ]: return retVal request = retVal[ 'Value' ] chain = X509Chain( keyObj = request.getPKey() ) retVal = chain.loadChainFromString( delegatedPem ) if not retVal[ 'OK' ]: return retVal retVal = chain.isValidProxy( ignoreDefault = True ) noGroupFlag = False if not retVal[ 'OK' ]: if DErrno.cmpError( retVal, DErrno.ENOGROUP ): noGroupFlag = True else: return retVal result = chain.isVOMS() if result[ 'OK' ] and result[ 'Value' ]: return S_ERROR( "Proxies with VOMS extensions are not allowed to be uploaded" ) retVal = request.checkChain( chain ) if not retVal[ 'OK' ]: return retVal if not retVal[ 'Value' ]: return S_ERROR( "Received chain does not match request: %s" % retVal[ 'Message' ] ) retVal = chain.getDIRACGroup() if not retVal[ 'OK' ]: return retVal userGroup = retVal[ 'Value' ] if not userGroup: userGroup = Registry.getDefaultUserGroup() retVal = Registry.getGroupsForDN( userDN ) if not retVal[ 'OK' ]: return retVal if not userGroup in retVal[ 'Value' ]: return S_ERROR( "%s group is not valid for %s" % ( userGroup, userDN ) ) # For proxies without embedded DIRAC group only one default is allowed # Cleaning all the proxies for this DN if any before uploading the new one. if noGroupFlag: retVal = self.deleteProxy( userDN ) if not retVal[ 'OK' ]: return retVal retVal = self.storeProxy( userDN, userGroup, chain ) if not retVal[ 'OK' ]: return retVal retVal = self.deleteRequest( requestId ) if not retVal[ 'OK' ]: return retVal return S_OK() def storeProxy( self, userDN, userGroup, chain ): """ Store user proxy into the Proxy repository for a user specified by his DN and group. """ retVal = Registry.getUsernameForDN( userDN ) if not retVal[ 'OK' ]: return retVal userName = retVal[ 'Value' ] #Get remaining secs retVal = chain.getRemainingSecs() if not retVal[ 'OK' ]: return retVal remainingSecs = retVal[ 'Value' ] if remainingSecs < self._minSecsToAllowStore: return S_ERROR( "Cannot store proxy, remaining secs %s is less than %s" % ( remainingSecs, self._minSecsToAllowStore ) ) #Compare the DNs retVal = chain.getIssuerCert() if not retVal[ 'OK' ]: return retVal proxyIdentityDN = retVal[ 'Value' ].getSubjectDN()[ 'Value' ] if not userDN == proxyIdentityDN: msg = "Mismatch in the user DN" vMsg = "Proxy says %s and credentials are %s" % ( proxyIdentityDN, userDN ) self.log.error( msg, vMsg ) return S_ERROR( "%s. %s" % ( msg, vMsg ) ) #Check the groups retVal = chain.getDIRACGroup() if not retVal[ 'OK' ]: return retVal proxyGroup = retVal[ 'Value' ] if not proxyGroup: proxyGroup = Registry.getDefaultUserGroup() if not userGroup == proxyGroup: msg = "Mismatch in the user group" vMsg = "Proxy says %s and credentials are %s" % ( proxyGroup, userGroup ) self.log.error( msg, vMsg ) return S_ERROR( "%s. %s" % ( msg, vMsg ) ) #Check if its limited if chain.isLimitedProxy()['Value']: return S_ERROR( "Limited proxies are not allowed to be stored" ) dLeft = remainingSecs / 86400 hLeft = remainingSecs / 3600 - dLeft * 24 mLeft = remainingSecs / 60 - hLeft * 60 - dLeft * 1440 sLeft = remainingSecs - hLeft * 3600 - mLeft * 60 - dLeft * 86400 self.log.info( "Storing proxy for credentials %s (%d:%02d:%02d:%02d left)" % ( proxyIdentityDN, dLeft, hLeft, mLeft, sLeft ) ) try: sUserDN = self._escapeString( userDN )[ 'Value' ] sUserGroup = self._escapeString( userGroup )[ 'Value' ] except KeyError: return S_ERROR( "Cannot escape DN" ) # Check what we have already got in the repository cmd = "SELECT TIMESTAMPDIFF( SECOND, UTC_TIMESTAMP(), ExpirationTime ), Pem FROM `ProxyDB_Proxies` WHERE UserDN=%s AND UserGroup=%s" % ( sUserDN, sUserGroup ) result = self._query( cmd ) if not result['OK']: return result # check if there is a previous ticket for the DN data = result[ 'Value' ] sqlInsert = True if len( data ) > 0: sqlInsert = False pem = data[0][1] if pem: remainingSecsInDB = data[0][0] if remainingSecs <= remainingSecsInDB: self.log.info( "Proxy stored is longer than uploaded, omitting.", "%s in uploaded, %s in db" % ( remainingSecs, remainingSecsInDB ) ) return S_OK() pemChain = chain.dumpAllToString()['Value'] dValues = { 'UserName' : self._escapeString( userName )[ 'Value' ], 'UserDN' : sUserDN, 'UserGroup' : sUserGroup, 'Pem' : self._escapeString( pemChain )[ 'Value' ], 'ExpirationTime' : 'TIMESTAMPADD( SECOND, %d, UTC_TIMESTAMP() )' % int( remainingSecs ), 'PersistentFlag' : "'False'" } if sqlInsert: sqlFields = [] sqlValues = [] for key in dValues: sqlFields.append( key ) sqlValues.append( dValues[ key ] ) cmd = "INSERT INTO `ProxyDB_Proxies` ( %s ) VALUES ( %s )" % ( ", ".join( sqlFields ), ", ".join( sqlValues ) ) else: sqlSet = [] sqlWhere = [] for k in dValues: if k in ( 'UserDN', 'UserGroup' ): sqlWhere.append( "%s = %s" % ( k, dValues[k] ) ) else: sqlSet.append( "%s = %s" % ( k, dValues[k] ) ) cmd = "UPDATE `ProxyDB_Proxies` SET %s WHERE %s" % ( ", ".join( sqlSet ), " AND ".join( sqlWhere ) ) self.logAction( "store proxy", userDN, userGroup, userDN, userGroup ) return self._update( cmd ) def purgeExpiredProxies( self, sendNotifications = True ): """ Purge expired requests from the db """ purged = 0 for tableName in ( "ProxyDB_Proxies", "ProxyDB_VOMSProxies" ): cmd = "DELETE FROM `%s` WHERE ExpirationTime < UTC_TIMESTAMP()" % tableName result = self._update( cmd ) if not result[ 'OK' ]: return result purged += result[ 'Value' ] self.log.info( "Purged %s expired proxies from %s" % ( result[ 'Value' ], tableName ) ) if sendNotifications: result = self.sendExpirationNotifications() if not result[ 'OK' ]: return result return S_OK( purged ) def deleteProxy( self, userDN, userGroup='any' ): """ Remove proxy of the given user from the repository """ try: userDN = self._escapeString( userDN )[ 'Value' ] if userGroup != 'any': userGroup = self._escapeString( userGroup )[ 'Value' ] except KeyError: return S_ERROR( "Invalid DN or group" ) req = "DELETE FROM `%%s` WHERE UserDN=%s" % userDN if userGroup != 'any': req += " AND UserGroup=%s" % userGroup for db in [ 'ProxyDB_Proxies', 'ProxyDB_VOMSProxies' ]: result = self._update( req % db ) return result def __getPemAndTimeLeft( self, userDN, userGroup = False, vomsAttr = False ): try: sUserDN = self._escapeString( userDN )[ 'Value' ] if userGroup: sUserGroup = self._escapeString( userGroup )[ 'Value' ] if vomsAttr: sVomsAttr = self._escapeString( vomsAttr )[ 'Value' ] except KeyError: return S_ERROR( "Invalid DN or group" ) if not vomsAttr: table = "`ProxyDB_Proxies`" else: table = "`ProxyDB_VOMSProxies`" cmd = "SELECT Pem, TIMESTAMPDIFF( SECOND, UTC_TIMESTAMP(), ExpirationTime ) from %s" % table cmd += "WHERE UserDN=%s AND TIMESTAMPDIFF( SECOND, UTC_TIMESTAMP(), ExpirationTime ) > 0" % ( sUserDN ) if userGroup: cmd += " AND UserGroup=%s" % sUserGroup if vomsAttr: cmd += " AND VOMSAttr=%s" % sVomsAttr retVal = self._query( cmd ) if not retVal['OK']: return retVal data = retVal[ 'Value' ] for record in data: if record[0]: return S_OK( ( record[0], record[1] ) ) if userGroup: userMask = "%s@%s" % ( userDN, userGroup ) else: userMask = userDN return S_ERROR( "%s has no proxy registered" % userMask ) def renewFromMyProxy( self, userDN, userGroup, lifeTime = False, chain = False ): if not lifeTime: lifeTime = 43200 if not self.__useMyProxy: return S_ERROR( "myproxy is disabled" ) #Get the chain if not chain: retVal = self.__getPemAndTimeLeft( userDN, userGroup ) if not retVal[ 'OK' ]: return retVal pemData = retVal[ 'Value' ][0] chain = X509Chain() retVal = chain.loadProxyFromString( pemData ) if not retVal[ 'OK' ]: return retVal originChainLifeTime = chain.getRemainingSecs()[ 'Value' ] maxMyProxyLifeTime = self.getMyProxyMaxLifeTime() #If we have a chain that's 0.8 of max mplifetime don't ask to mp if originChainLifeTime > maxMyProxyLifeTime * 0.8: self.log.error( "Skipping myproxy download", "user %s %s chain has %s secs and requested %s secs" % ( userDN, userGroup, originChainLifeTime, maxMyProxyLifeTime ) ) return S_OK( chain ) lifeTime *= 1.3 if lifeTime > maxMyProxyLifeTime: lifeTime = maxMyProxyLifeTime self.log.error( "Renewing proxy from myproxy", "user %s %s for %s secs" % ( userDN, userGroup, lifeTime ) ) myProxy = MyProxy( server = self.getMyProxyServer() ) retVal = myProxy.getDelegatedProxy( chain, lifeTime ) if not retVal[ 'OK' ]: return retVal mpChain = retVal[ 'Value' ] retVal = mpChain.getRemainingSecs() if not retVal[ 'OK' ]: return S_ERROR( "Can't retrieve remaining secs from renewed proxy: %s" % retVal[ 'Message' ] ) mpChainSecsLeft = retVal['Value'] if mpChainSecsLeft < originChainLifeTime: self.log.info( "Chain downloaded from myproxy has less lifetime than the one stored in the db", "\n Downloaded from myproxy: %s secs\n Stored in DB: %s secs" % ( mpChainSecsLeft, originChainLifeTime ) ) return S_OK( chain ) retVal = mpChain.getDIRACGroup() if not retVal[ 'OK' ]: return S_ERROR( "Can't retrieve DIRAC Group from renewed proxy: %s" % retVal[ 'Message' ] ) chainGroup = retVal['Value'] if chainGroup != userGroup: return S_ERROR( "Mismatch between renewed proxy group and expected: %s vs %s" % ( userGroup, chainGroup ) ) retVal = self.storeProxy( userDN, userGroup, mpChain ) if not retVal[ 'OK' ]: self.log.error( "Cannot store proxy after renewal", retVal[ 'Message' ] ) retVal = myProxy.getServiceDN() if not retVal[ 'OK' ]: hostDN = userDN else: hostDN = retVal[ 'Value' ] self.logAction( "myproxy renewal", hostDN, "host", userDN, userGroup ) return S_OK( mpChain ) def __getPUSProxy( self, userDN, userGroup, requiredLifetime, requestedVOMSAttr = None ): result = Registry.getGroupsForDN( userDN ) if not result['OK']: return result validGroups = result['Value'] if not userGroup in validGroups: return S_ERROR( 'Invalid group %s for user' % userGroup ) voName = Registry.getVOForGroup( userGroup ) if not voName: return S_ERROR( 'Can not determine VO for group %s' % userGroup ) retVal = self.__getVOMSAttribute( userGroup, requestedVOMSAttr ) if not retVal[ 'OK' ]: return retVal vomsAttribute = retVal[ 'Value' ][ 'attribute' ] vomsVO = retVal[ 'Value' ][ 'VOMSVO' ] puspServiceURL = Registry.getVOOption( voName, 'PUSPServiceURL' ) if not puspServiceURL: return S_ERROR( 'Can not determine PUSP service URL for VO %s' % voName ) user = userDN.split(":")[-1] puspURL = "%s?voms=%s:%s&proxy-renewal=false&disable-voms-proxy=false" \ "&rfc-proxy=true&cn-label=user:%s" % ( puspServiceURL, vomsVO, vomsAttribute, user ) try: proxy = urllib.urlopen( puspURL ).read() except Exception as e: return S_ERROR( 'Failed to get proxy from the PUSP server' ) chain = X509Chain() chain.loadChainFromString( proxy ) chain.loadKeyFromString( proxy ) result = chain.getCredentials() if not result['OK']: return S_ERROR( 'Failed to get a valid PUSP proxy' ) credDict = result['Value'] if credDict['identity'] != userDN: return S_ERROR( 'Requested DN does not match the obtained one in the PUSP proxy' ) timeLeft = credDict['secondsLeft'] result = chain.generateProxyToString( lifeTime = timeLeft, diracGroup = userGroup ) if not result['OK']: return result proxyString = result['Value'] return S_OK( ( proxyString, timeLeft ) ) def getProxy( self, userDN, userGroup, requiredLifeTime = False ): """ Get proxy string from the Proxy Repository for use with userDN in the userGroup """ # Get the Per User SubProxy if one is requested if isPUSPdn( userDN ): result = self.__getPUSProxy( userDN, userGroup, requiredLifeTime ) if not result['OK']: return result pemData = result[ 'Value' ][0] timeLeft = result[ 'Value' ][1] chain = X509Chain() result = chain.loadProxyFromString( pemData ) if not result[ 'OK' ]: return result return S_OK( ( chain, timeLeft ) ) # Standard proxy is requested retVal = self.__getPemAndTimeLeft( userDN, userGroup ) if not retVal[ 'OK' ]: return retVal pemData = retVal[ 'Value' ][0] timeLeft = retVal[ 'Value' ][1] chain = X509Chain() retVal = chain.loadProxyFromString( pemData ) if not retVal[ 'OK' ]: return retVal if requiredLifeTime: if timeLeft < requiredLifeTime: retVal = self.renewFromMyProxy( userDN, userGroup, lifeTime = requiredLifeTime, chain = chain ) if not retVal[ 'OK' ]: return S_ERROR( "Can't get a proxy for %s seconds: %s" % ( requiredLifeTime, retVal[ 'Message' ] ) ) chain = retVal[ 'Value' ] #Proxy is invalid for some reason, let's delete it if not chain.isValidProxy()['Value']: self.deleteProxy( userDN, userGroup ) return S_ERROR( "%s@%s has no proxy registered" % ( userDN, userGroup ) ) return S_OK( ( chain, timeLeft ) ) def __getVOMSAttribute( self, userGroup, requiredVOMSAttribute = False ): if requiredVOMSAttribute: return S_OK( { 'attribute' : requiredVOMSAttribute, 'VOMSVO' : Registry.getVOMSVOForGroup( userGroup ) } ) csVOMSMapping = Registry.getVOMSAttributeForGroup( userGroup ) if not csVOMSMapping: return S_ERROR( "No mapping defined for group %s in the CS" % userGroup ) return S_OK( { 'attribute' : csVOMSMapping, 'VOMSVO' : Registry.getVOMSVOForGroup( userGroup ) } ) def getVOMSProxy( self, userDN, userGroup, requiredLifeTime = False, requestedVOMSAttr = False ): """ Get proxy string from the Proxy Repository for use with userDN in the userGroup and VOMS attr """ retVal = self.__getVOMSAttribute( userGroup, requestedVOMSAttr ) if not retVal[ 'OK' ]: return retVal vomsAttr = retVal[ 'Value' ][ 'attribute' ] vomsVO = retVal[ 'Value' ][ 'VOMSVO' ] #Look in the cache retVal = self.__getPemAndTimeLeft( userDN, userGroup, vomsAttr ) if retVal[ 'OK' ]: pemData = retVal[ 'Value' ][0] vomsTime = retVal[ 'Value' ][1] chain = X509Chain() retVal = chain.loadProxyFromString( pemData ) if retVal[ 'OK' ]: retVal = chain.getRemainingSecs() if retVal[ 'OK' ]: remainingSecs = retVal[ 'Value' ] if requiredLifeTime and requiredLifeTime <= vomsTime and requiredLifeTime <= remainingSecs: return S_OK( ( chain, min( vomsTime, remainingSecs ) ) ) if isPUSPdn( userDN ): # Get the Per User SubProxy if one is requested result = self.__getPUSProxy( userDN, userGroup, requiredLifeTime, requestedVOMSAttr ) if not result['OK']: return result pemData = result[ 'Value' ][0] chain = X509Chain() result = chain.loadProxyFromString( pemData ) if not result[ 'OK' ]: return result else: # Get the stored proxy and dress it with the VOMS extension retVal = self.getProxy( userDN, userGroup, requiredLifeTime ) if not retVal[ 'OK' ]: return retVal chain, secsLeft = retVal[ 'Value' ] if requiredLifeTime and requiredLifeTime > secsLeft: return S_ERROR( "Stored proxy is not long lived enough" ) vomsMgr = VOMS() retVal = vomsMgr.getVOMSAttributes( chain ) if retVal[ 'OK' ]: attrs = retVal[ 'Value' ] if len( attrs ) > 0: if attrs[0] != vomsAttr: return S_ERROR( "Stored proxy has already a different VOMS attribute %s than requested %s" % ( vomsAttr, attrs[0] ) ) else: result = self.__storeVOMSProxy( userDN, userGroup, vomsAttr, chain ) if not result[ 'OK' ]: return result secsLeft = result[ 'Value' ] if requiredLifeTime and requiredLifeTime <= secsLeft: return S_OK( ( chain, secsLeft ) ) return S_ERROR( "Stored proxy has already a different VOMS attribute and is not long lived enough" ) retVal = vomsMgr.setVOMSAttributes( chain , vomsAttr, vo = vomsVO ) if not retVal[ 'OK' ]: return S_ERROR( "Cannot append voms extension: %s" % retVal[ 'Message' ] ) chain = retVal[ 'Value' ] # We have got the VOMS proxy, store it into the cache result = self.__storeVOMSProxy( userDN, userGroup, vomsAttr, chain ) if not result[ 'OK' ]: return result secsLeft = result[ 'Value' ] return S_OK( ( chain, secsLeft ) ) def __storeVOMSProxy( self, userDN, userGroup, vomsAttr, chain ): retVal = self._getConnection() if not retVal[ 'OK' ]: return retVal connObj = retVal[ 'Value' ] retVal1 = VOMS().getVOMSProxyInfo( chain, 'actimeleft' ) retVal2 = VOMS().getVOMSProxyInfo( chain, 'timeleft' ) if not retVal1[ 'OK' ]: return retVal1 if not retVal2[ 'OK' ]: return retVal2 try: vomsSecsLeft1 = int( retVal1[ 'Value' ].strip() ) vomsSecsLeft2 = int( retVal2[ 'Value' ].strip() ) vomsSecsLeft = min( vomsSecsLeft1, vomsSecsLeft2 ) except Exception as e: return S_ERROR( "Can't parse VOMS time left: %s" % str( e ) ) secsLeft = min( vomsSecsLeft, chain.getRemainingSecs()[ 'Value' ] ) pemData = chain.dumpAllToString()[ 'Value' ] result = Registry.getUsernameForDN( userDN ) if not result[ 'OK' ]: userName = "" else: userName = result[ 'Value' ] try: sUserName = self._escapeString( userName )[ 'Value' ] sUserDN = self._escapeString( userDN )[ 'Value' ] sUserGroup = self._escapeString( userGroup )[ 'Value' ] sVomsAttr = self._escapeString( vomsAttr )[ 'Value' ] sPemData = self._escapeString( pemData )[ 'Value' ] except KeyError: return S_ERROR( "Could not escape some data" ) cmd = "REPLACE INTO `ProxyDB_VOMSProxies` ( UserName, UserDN, UserGroup, VOMSAttr, Pem, ExpirationTime ) VALUES " cmd += "( %s, %s, %s, %s, %s, TIMESTAMPADD( SECOND, %d, UTC_TIMESTAMP() ) )" % ( sUserName, sUserDN, sUserGroup, sVomsAttr, sPemData, secsLeft ) result = self._update( cmd, conn = connObj ) if not result[ 'OK' ]: return result return S_OK( secsLeft ) def getRemainingTime( self, userDN, userGroup ): """ Returns the remaining time the proxy is valid """ try: userDN = self._escapeString( userDN )[ 'Value' ] userGroup = self._escapeString( userGroup )[ 'Value' ] except KeyError: return S_ERROR( "Invalid DN or group" ) cmd = "SELECT TIMESTAMPDIFF( SECOND, UTC_TIMESTAMP(), ExpirationTime ) FROM `ProxyDB_Proxies`" retVal = self._query( "%s WHERE UserDN = %s AND UserGroup = %s" % ( cmd, userDN, userGroup ) ) if not retVal[ 'OK' ]: return retVal data = retVal[ 'Value' ] if not data: return S_OK( 0 ) return S_OK( int( data[0][0] ) ) def getUsers( self, validSecondsLeft = 0, dnMask = False, groupMask = False, userMask = False ): """ Get all the distinct users from the Proxy Repository. Optionally, only users with valid proxies within the given validity period expressed in seconds """ cmd = "SELECT UserName, UserDN, UserGroup, ExpirationTime, PersistentFlag FROM `ProxyDB_Proxies`" sqlCond = [] if validSecondsLeft: try: validSecondsLeft = int( validSecondsLeft ) except ValueError: return S_ERROR( "Seconds left has to be an integer" ) sqlCond.append( "TIMESTAMPDIFF( SECOND, UTC_TIMESTAMP(), ExpirationTime ) > %d" % validSecondsLeft ) for field, mask in ( ( 'UserDN', dnMask ), ( 'UserGroup', groupMask ), ( 'UserName', userMask ) ): if not mask: continue if type( mask ) not in ( types.ListType, types.TupleType ): mask = [ mask ] mask = [ self._escapeString( entry )[ 'Value' ] for entry in mask ] sqlCond.append( "%s in ( %s )" % ( field, ", ".join( mask ) ) ) if sqlCond: cmd += " WHERE %s" % " AND ".join( sqlCond ) retVal = self._query( cmd ) if not retVal[ 'OK' ]: return retVal data = [] for record in retVal[ 'Value' ]: data.append( { 'Name': record[0], 'DN' : record[1], 'group' : record[2], 'expirationtime' : record[3], 'persistent' : record[4] == 'True' } ) return S_OK( data ) def getCredentialsAboutToExpire( self, requiredSecondsLeft, onlyPersistent = True ): cmd = "SELECT UserDN, UserGroup, ExpirationTime, PersistentFlag FROM `ProxyDB_Proxies`" cmd += " WHERE TIMESTAMPDIFF( SECOND, ExpirationTime, UTC_TIMESTAMP() ) < %d and TIMESTAMPDIFF( SECOND, ExpirationTime, UTC_TIMESTAMP() ) > 0" % requiredSecondsLeft if onlyPersistent: cmd += " AND PersistentFlag = 'True'" return self._query( cmd ) def setPersistencyFlag( self, userDN, userGroup, persistent = True ): """ Set the proxy PersistentFlag to the flag value """ try: sUserDN = self._escapeString( userDN )[ 'Value' ] sUserGroup = self._escapeString( userGroup )[ 'Value' ] except KeyError: return S_ERROR( "Can't escape something" ) if persistent: sqlFlag = "True" else: sqlFlag = "False" retVal = self._query( "SELECT PersistentFlag FROM `ProxyDB_Proxies` WHERE UserDN=%s AND UserGroup=%s" % ( sUserDN, sUserGroup ) ) sqlInsert = True if retVal[ 'OK' ]: data = retVal[ 'Value' ] if len( data ) > 0: sqlInsert = False if data[0][0] == sqlFlag: return S_OK() if sqlInsert: #If it's not in the db and we're removing the persistency then do nothing if not persistent: return S_OK() cmd = "INSERT INTO `ProxyDB_Proxies` ( UserDN, UserGroup, Pem, ExpirationTime, PersistentFlag ) VALUES " cmd += "( %s, %s, '', UTC_TIMESTAMP(), 'True' )" % ( sUserDN, sUserGroup ) else: cmd = "UPDATE `ProxyDB_Proxies` SET PersistentFlag='%s' WHERE UserDN=%s AND UserGroup=%s" % ( sqlFlag, sUserDN, sUserGroup ) retVal = self._update( cmd ) if not retVal[ 'OK' ]: return retVal return S_OK() def getProxiesContent( self, selDict, sortList, start = 0, limit = 0 ): """ Function to get the contents of the db parameters are a filter to the db """ fields = ( "UserName", "UserDN", "UserGroup", "ExpirationTime", "PersistentFlag" ) cmd = "SELECT %s FROM `ProxyDB_Proxies`" % ", ".join( fields ) sqlWhere = [ "Pem is not NULL" ] for field in selDict: if field not in fields: continue fVal = selDict[field] if type( fVal ) in ( types.DictType, types.TupleType, types.ListType ): sqlWhere.append( "%s in (%s)" % ( field, ", ".join( [ self._escapeString( str( value ) )[ 'Value' ] for value in fVal ] ) ) ) else: sqlWhere.append( "%s = %s" % ( field, self._escapeString( str( fVal ) )[ 'Value' ] ) ) sqlOrder = [] if sortList: for sort in sortList: if len( sort ) == 1: sort = ( sort, "DESC" ) elif len( sort ) > 2: return S_ERROR( "Invalid sort %s" % sort ) if sort[0] not in fields: return S_ERROR( "Invalid sorting field %s" % sort[0] ) if sort[1].upper() not in ( "ASC", "DESC" ): return S_ERROR( "Invalid sorting order %s" % sort[1] ) sqlOrder.append( "%s %s" % ( sort[0], sort[1] ) ) if sqlWhere: cmd = "%s WHERE %s" % ( cmd, " AND ".join( sqlWhere ) ) if sqlOrder: cmd = "%s ORDER BY %s" % ( cmd, ", ".join( sqlOrder ) ) if limit: try: start = int( start ) limit = int( limit ) except ValueError: return S_ERROR( "start and limit have to be integers" ) cmd += " LIMIT %d,%d" % ( start, limit ) retVal = self._query( cmd ) if not retVal[ 'OK' ]: return retVal data = [] for record in retVal[ 'Value' ]: record = list( record ) if record[4] == 'True': record[4] = True else: record[4] = False data.append( record ) totalRecords = len( data ) cmd = "SELECT COUNT( UserGroup ) FROM `ProxyDB_Proxies`" if sqlWhere: cmd = "%s WHERE %s" % ( cmd, " AND ".join( sqlWhere ) ) retVal = self._query( cmd ) if retVal[ 'OK' ]: totalRecords = retVal[ 'Value' ][0][0] return S_OK( { 'ParameterNames' : fields, 'Records' : data, 'TotalRecords' : totalRecords } ) def logAction( self, action, issuerDN, issuerGroup, targetDN, targetGroup ): """ Add an action to the log """ try: sAction = self._escapeString( action )[ 'Value' ] sIssuerDN = self._escapeString( issuerDN )[ 'Value' ] sIssuerGroup = self._escapeString( issuerGroup )[ 'Value' ] sTargetDN = self._escapeString( targetDN )[ 'Value' ] sTargetGroup = self._escapeString( targetGroup )[ 'Value' ] except KeyError: return S_ERROR( "Can't escape from death" ) cmd = "INSERT INTO `ProxyDB_Log` ( Action, IssuerDN, IssuerGroup, TargetDN, TargetGroup, Timestamp ) VALUES " cmd += "( %s, %s, %s, %s, %s, UTC_TIMESTAMP() )" % ( sAction, sIssuerDN, sIssuerGroup, sTargetDN, sTargetGroup ) retVal = self._update( cmd ) if not retVal[ 'OK' ]: self.log.error( "Can't add a proxy action log: ", retVal[ 'Message' ] ) def purgeLogs( self ): """ Purge expired requests from the db """ cmd = "DELETE FROM `ProxyDB_Log` WHERE TIMESTAMPDIFF( SECOND, Timestamp, UTC_TIMESTAMP() ) > 15552000" return self._update( cmd ) def getLogsContent( self, selDict, sortList, start = 0, limit = 0 ): """ Function to get the contents of the logs table parameters are a filter to the db """ fields = ( "Action", "IssuerDN", "IssuerGroup", "TargetDN", "TargetGroup", "Timestamp" ) cmd = "SELECT %s FROM `ProxyDB_Log`" % ", ".join( fields ) if selDict: qr = [] if 'beforeDate' in selDict: qr.append( "Timestamp < %s" % self._escapeString( selDict[ 'beforeDate' ] )[ 'Value' ] ) del selDict[ 'beforeDate' ] if 'afterDate' in selDict: qr.append( "Timestamp > %s" % self._escapeString( selDict[ 'afterDate' ] )[ 'Value' ] ) del selDict[ 'afterDate' ] for field in selDict: qr.append( "(%s)" % " OR ".join( [ "%s=%s" % ( field, self._escapeString( str( value ) )[ 'Value' ] ) for value in selDict[field] ] ) ) whereStr = " WHERE %s" % " AND ".join( qr ) cmd += whereStr else: whereStr = "" if sortList: cmd += " ORDER BY %s" % ", ".join( [ "%s %s" % ( sort[0], sort[1] ) for sort in sortList ] ) if limit: cmd += " LIMIT %d,%d" % ( start, limit ) retVal = self._query( cmd ) if not retVal[ 'OK' ]: return retVal data = retVal[ 'Value' ] totalRecords = len( data ) cmd = "SELECT COUNT( Timestamp ) FROM `ProxyDB_Log`" cmd += whereStr retVal = self._query( cmd ) if retVal[ 'OK' ]: totalRecords = retVal[ 'Value' ][0][0] return S_OK( { 'ParameterNames' : fields, 'Records' : data, 'TotalRecords' : totalRecords } ) def generateToken( self, requesterDN, requesterGroup, numUses = 1, lifeTime = 0, retries = 10 ): """ Generate and return a token and the number of uses for the token """ if not lifeTime: lifeTime = gConfig.getValue( "/DIRAC/VOPolicy/TokenLifeTime", self.__defaultTokenLifetime ) maxUses = gConfig.getValue( "/DIRAC/VOPolicy/TokenMaxUses", self.__defaultTokenMaxUses ) numUses = max( 1, min( numUses, maxUses ) ) m = hashlib.md5() rndData = "%s.%s.%s.%s" % ( time.time(), random.random(), numUses, lifeTime ) m.update( rndData ) token = m.hexdigest() fieldsSQL = ", ".join( ( "Token", "RequesterDN", "RequesterGroup", "ExpirationTime", "UsesLeft" ) ) valuesSQL = ", ".join( ( self._escapeString( token )['Value'], self._escapeString( requesterDN )['Value'], self._escapeString( requesterGroup )['Value'], "TIMESTAMPADD( SECOND, %d, UTC_TIMESTAMP() )" % int( lifeTime ), str( numUses ) ) ) insertSQL = "INSERT INTO `ProxyDB_Tokens` ( %s ) VALUES ( %s )" % ( fieldsSQL, valuesSQL ) result = self._update( insertSQL ) if result[ 'OK' ]: return S_OK( ( token, numUses ) ) if result[ 'Message' ].find( "uplicate entry" ) > -1: if retries: return self.generateToken( numUses, lifeTime, retries - 1 ) return S_ERROR( "Max retries reached for token generation. Aborting" ) return result def purgeExpiredTokens( self ): delSQL = "DELETE FROM `ProxyDB_Tokens` WHERE ExpirationTime < UTC_TIMESTAMP() OR UsesLeft < 1" return self._update( delSQL ) def useToken( self, token, requesterDN, requesterGroup ): sqlCond = " AND ".join( ( "UsesLeft > 0", "Token=%s" % self._escapeString( token )['Value'], "RequesterDN=%s" % self._escapeString( requesterDN )['Value'], "RequesterGroup=%s" % self._escapeString( requesterGroup )['Value'], "ExpirationTime >= UTC_TIMESTAMP()" ) ) updateSQL = "UPDATE `ProxyDB_Tokens` SET UsesLeft = UsesLeft - 1 WHERE %s" % sqlCond result = self._update( updateSQL ) if not result[ 'OK' ]: return result return S_OK( result[ 'Value' ] > 0 ) def __cleanExpNotifs( self ): cmd = "DELETE FROM `ProxyDB_ExpNotifs` WHERE ExpirationTime < UTC_TIMESTAMP()" return self._update( cmd ) def sendExpirationNotifications( self ): result = self.__cleanExpNotifs() if not result[ 'OK' ]: return result cmd = "SELECT UserDN, UserGroup, LifeLimit FROM `ProxyDB_ExpNotifs`" result = self._query( cmd ) if not result[ 'OK' ]: return result notifDone = dict( [ ( ( row[0], row[1] ), row[2] ) for row in result[ 'Value' ] ] ) notifLimits = sorted( [ int( x ) for x in self.getCSOption( "NotificationTimes", ProxyDB.NOTIFICATION_TIMES ) ] ) sqlSel = "UserDN, UserGroup, TIMESTAMPDIFF( SECOND, UTC_TIMESTAMP(), ExpirationTime )" sqlCond = "TIMESTAMPDIFF( SECOND, UTC_TIMESTAMP(), ExpirationTime ) < %d" % max( notifLimits ) cmd = "SELECT %s FROM `ProxyDB_Proxies` WHERE %s" % ( sqlSel, sqlCond ) result = self._query( cmd ) if not result[ 'OK' ]: return result pilotProps = ( Properties.GENERIC_PILOT, Properties.PILOT ) data = result[ 'Value' ] sent = [] for row in data: userDN, group, lTime = row #If it's a pilot proxy, skip it if Registry.groupHasProperties( group, pilotProps ): continue #IF it dosn't hace the auto upload proxy, skip it if not Registry.getGroupOption( group, "AutoUploadProxy", False ): continue notKey = ( userDN, group ) for notifLimit in notifLimits: if notifLimit < lTime: #Not yet in this notification limit continue if notKey in notifDone and notifDone[ notKey ] <= notifLimit: #Already notified for this notification limit break if not self._notifyProxyAboutToExpire( userDN, group, lTime, notifLimit ): #Cannot send notification, retry later break try: sUserDN = self._escapeString( userDN )[ 'Value' ] sGroup = self._escapeString( group )[ 'Value' ] except KeyError: return S_ERROR( "OOPS" ) if notKey not in notifDone: values = "( %s, %s, %d, TIMESTAMPADD( SECOND, %s, UTC_TIMESTAMP() ) )" % ( sUserDN, sGroup, notifLimit, lTime ) cmd = "INSERT INTO `ProxyDB_ExpNotifs` ( UserDN, UserGroup, LifeLimit, ExpirationTime ) VALUES %s" % values result = self._update( cmd ) if not result[ 'OK' ]: gLogger.error( "Could not mark notification as sent", result[ 'Message' ] ) else: values = "LifeLimit = %d, ExpirationTime = TIMESTAMPADD( SECOND, %s, UTC_TIMESTAMP() )" % ( notifLimit, lTime ) cmd = "UPDATE `ProxyDB_ExpNotifs` SET %s WHERE UserDN = %s AND UserGroup = %s" % ( values, sUserDN, sGroup ) result = self._update( cmd ) if not result[ 'OK' ]: gLogger.error( "Could not mark notification as sent", result[ 'Message' ] ) sent.append( ( userDN, group, lTime ) ) notifDone[ notKey ] = notifLimit return S_OK( sent ) def _notifyProxyAboutToExpire( self, userDN, userGroup, lTime, notifLimit ): result = Registry.getUsernameForDN( userDN ) if not result[ 'OK' ]: return False userName = result[ 'Value' ] userEMail = Registry.getUserOption( userName, "Email", "" ) if not userEMail: gLogger.error( "Could not discover user email", userName ) return False daysLeft = int( lTime / 86400 ) msgSubject = "Your proxy uploaded to DIRAC will expire in %d days" % daysLeft msgBody = """\ Dear %s, The proxy you uploaded to DIRAC will expire in aproximately %d days. The proxy information is: DN: %s Group: %s If you plan on keep using this credentials please upload a newer proxy to DIRAC by executing: $ dirac-proxy-init -P -g %s --rfc If you have been issued different certificate, please make sure you have a proxy uploaded with that certificate. Cheers, DIRAC's Proxy Manager """ % ( userName, daysLeft, userDN, userGroup, userGroup ) fromAddr = self.getFromAddr() result = self.__notifClient.sendMail( userEMail, msgSubject, msgBody, fromAddress = fromAddr ) if not result[ 'OK' ]: gLogger.error( "Could not send email", result[ 'Message' ] ) return False return True
class TokenAgent(AgentModule): ############################################################################# def initialize(self): """ TokenAgent initialization """ try: self.rsDB = ResourceStatusDB() self.nc = NotificationClient() return S_OK() except Exception: errorStr = "TokenAgent initialization" gLogger.exception(errorStr) return S_ERROR(errorStr) ############################################################################# def execute(self): """ The main TokenAgent execution method. Checks for tokens owned by users that are expiring, and notifies those users. Calls rsDB.setToken() to set 'RS_SVC' as owner for those tokens that expired. """ try: #reAssign the token to RS_SVC for g in ('Site', 'StorageElement'): tokensExpired = self.rsDB.getTokens(g, None, datetime.datetime.utcnow()) for token in tokensExpired: self.rsDB.setToken( g, token[0], 'RS_SVC', datetime.datetime(9999, 12, 31, 23, 59, 59)) #notify token owners in2Hours = datetime.datetime.utcnow() + datetime.timedelta(hours=2) for g in ('Site', 'StorageElement'): tokensExpiring = self.rsDB.getTokens(g, None, in2Hours) for token in tokensExpiring: name = token[0] user = token[1] if user == 'RS_SVC': continue expiration = token[2] mailMessage = "The token for %s %s " % (g, name) mailMessage = mailMessage + "will expire on %s\n\n" % expiration mailMessage = mailMessage + "You can renew it with command 'dirac-rss-renew-token'.\n" mailMessage = mailMessage + "If you don't take any action, RSS will take control of the resource." self.nc.sendMail( getMailForUser(user)['Value'][0], 'Token for %s is expiring' % name, mailMessage) return S_OK() except Exception: errorStr = "TokenAgent execution" gLogger.exception(errorStr) return S_ERROR(errorStr)
def registerUser(self,paramcopy): gLogger.info("Start processing a registration request") """ Unfortunately there is no way to get rid of empty text values in JS, so i have to hardcode it on server side. Hate it! """ default_values = ["John Smith","jsmith","*****@*****.**","+33 9 10 00 10 00"] default_values.append("Select preferred virtual organization(s)") default_values.append("Select your country") default_values.append("Any additional information you want to provide to administrators") # Check for having a DN but no username dn = getUserDN() username = getUsername() gLogger.debug("User's DN: %s and DIRAC username: %s" % (dn, username)) if not username == "anonymous": error = "You are already registered in DIRAC with username: %s" % username gLogger.debug("Service response: %s" % error) return {"success":"false","error":error} else: if not dn: error = "Certificate is not loaded to a browser or DN is absent" gLogger.debug("Service response: %s" % error) return {"success":"false","error":error} body = "" userMail = False vo = [] # Check for user's email, creating mail body gLogger.debug("Request's body:") for i in paramcopy: gLogger.debug("%s - %s" % (i,paramcopy[i])) if not paramcopy[i] in default_values: if i == "email": userMail = paramcopy[i] if i == "vo": vo = paramcopy[i].split(",") body = body + str(i) + ' - "' + str(paramcopy[i]) + '"\n' if not userMail: error = "Can not get your email address from the request" gLogger.debug("Service response: %s" % error) return {"success":"false","error":error} gLogger.info("User want to be register in VO(s): %s" % vo) # TODO Check for previous requests # Get admin mail based on requested VO i.e. mail of VO admin mails = list() gLogger.debug("Trying to get admin username to take care about request") for i in vo: gLogger.debug("VOAdmin for VO: %s" % i) i = i.strip() voadm = gConfig.getValue("/Registry/VO/%s/VOAdmin" % i,[]) gLogger.debug("/Registry/VO/%s/VOAdmin - %s" % (i,voadm)) for user in voadm: mails.append(user) # If no VOAdmin - try to get admin mails based on group properties if not len(mails) > 0: gLogger.debug("No VO admins found. Trying to get something based on group property") groupList = list() groups = gConfig.getSections("/Registry/Groups") gLogger.debug("Group response: %s" % groups) if groups["OK"]: allGroups = groups["Value"] gLogger.debug("Looking for UserAdministrator property") for j in allGroups: props = getProperties(j) gLogger.debug("%s properties: %s" % (j,props)) #1 if "UserAdministrator" in props: # property which is used for user administration groupList.append(j) groupList = uniqueElements(groupList) gLogger.debug("Chosen group(s): %s" % groupList) if len(groupList) > 0: for i in groupList: users = gConfig.getValue("/Registry/Groups/%s/Users" % i,[]) gLogger.debug("%s users: %s" % (i,users)) for user in users: mails.append(user) # Last stand - Failsafe option if not len(mails) > 0: gLogger.debug("No suitable groups found. Trying failsafe") regAdmin = gConfig.getValue("/Website/UserRegistrationAdmin",[]) gLogger.debug("/Website/UserRegistrationAdmin - %s" % regAdmin) for user in regAdmin: mails.append(user) mails = uniqueElements(mails) gLogger.info("Chosen admin(s): %s" % mails) # Final check of usernames if not len(mails) > 0: error = "Can't get in contact with administrators about your request\n" error = error + "Most likely this DIRAC instance is not configured yet" gLogger.debug("Service response: %s" % error) return {"success":"false","error":error} # Convert usernames to { e-mail : full name } gLogger.debug("Trying to get admin's mail and associated name") sendDict = dict() for user in mails: email = gConfig.getValue("/Registry/Users/%s/Email" % user,"") gLogger.debug("/Registry/Users/%s/Email - '%s'" % (user,email)) emil = email.strip() if not email: gLogger.error("Can't find value for option /Registry/Users/%s/Email" % user) continue fname = gConfig.getValue("/Registry/Users/%s/FullName" % user,"") gLogger.debug("/Registry/Users/%s/FullName - '%s'" % (user,fname)) fname = fname.strip() if not fname: fname = user gLogger.debug("FullName is absent, name to be used: %s" % fname) sendDict[email] = fname # Final check of mails gLogger.debug("Final dictionary with mails to be used %s" % sendDict) if not len(sendDict) > 0: error = "Can't get in contact with administrators about your request\n" error = error + "Most likely this DIRAC instance is not configured yet" gLogger.debug("Service response: %s" % error) return {"success":"false","error":error} # Sending a mail sentSuccess = list() sentFailed = list() gLogger.debug("Initializing Notification client") ntc = NotificationClient(lambda x, timeout: getRPCClient(x, timeout=timeout, static = True) ) gLogger.debug("Sending messages") for email,name in sendDict.iteritems(): gLogger.debug("ntc.sendMail(%s,New user has registered,%s,%s,False" % (email,body,userMail)) result = ntc.sendMail(email,"New user has registered",body,userMail,False) if not result["OK"]: error = name + ": " + result["Message"] sentFailed.append(error) gLogger.error("Sent failure: ", error) else: gLogger.info("Successfully sent to %s" % name) sentSuccess.append(name) # Returning results sName = ", ".join(sentSuccess) gLogger.info("End of processing of a registration request") gLogger.debug("Service response sent to a user:"******"Your registration request were sent successfully to: " result = result + sName + "\n\nFailed to sent it to:\n" result = result + "\n".join(sentFailed) gLogger.debug(result) return {"success":"true","result":result} elif len(sentSuccess) > 0 and (not len(sentFailed)) > 0: result = "Your registration request were sent successfully to: %s" % sName gLogger.debug(result) return {"success":"true","result":result} elif (not len(sentSuccess)) > 0 and len(sentFailed) > 0: result = "Failed to sent your request to:\n" result = result + "\n".join(sentFailed) gLogger.debug(result) return {"success":"false","error":result} else: result = "No messages were sent to administrator due technical failure" gLogger.debug(result) return {"success":"false","error":result}
from DIRAC.Core.Security.Misc import getProxyInfo from DIRAC import gConfig, gLogger from DIRAC.Core.DISET.RPCClient import RPCClient from DIRAC.ResourceStatusSystem.Utilities.CS import getMailForUser nc = NotificationClient() s = RPCClient("ResourceStatus/ResourceStatus") res = getProxyInfo() if not res["OK"]: gLogger.error("Failed to get proxy information", res["Message"]) DIRAC.exit(2) userName = res["Value"]["username"] group = res["Value"]["group"] if group not in ("diracAdmin", "lhcb_prod"): gLogger.error("You must be lhcb_prod or diracAdmin to execute this script") gLogger.info("Please issue 'lhcb-proxy-init -g lhcb_prod' or 'lhcb-proxy-init -g diracAdmin'") DIRAC.exit(2) for arg in args: g = s.whatIs(arg) res = s.extendToken(g, arg, hours) if not res["OK"]: gLogger.error("Problem with extending: %s" % res["Message"]) DIRAC.exit(2) mailMessage = "The token for %s %s has been successfully renewed for others %i hours" % (g, arg, hours) nc.sendMail(getMailForUser(userName)["Value"][0], "Token for %s renewed" % arg, mailMessage) DIRAC.exit(0)
def main(): global excludedHosts Script.registerSwitch( "e:", "exclude=", "Comma separated list of hosts to be excluded from the scanning process", setExcludedHosts) Script.parseCommandLine(ignoreErrors=False) componentType = '' # Get my setup mySetup = gConfig.getValue('DIRAC/Setup') # Retrieve information from all the hosts client = SystemAdministratorIntegrator(exclude=excludedHosts) resultAll = client.getOverallStatus() # Retrieve user installing the component result = getProxyInfo() if result['OK']: user = result['Value']['username'] else: DIRACexit(-1) if not user: user = '******' notificationClient = NotificationClient() for host in resultAll['Value']: if not resultAll['Value'][host]['OK']: # If the host cannot be contacted, exclude it and send message excludedHosts.append(host) result = notificationClient.sendMail( Operations().getValue('EMail/Production', []), 'Unreachable host', '\ndirac-populate-component-db: Could not fill the database with the components from unreachable host %s\n' % host) if not result['OK']: gLogger.error( 'Can not send unreachable host notification mail: %s' % result['Message']) if not resultAll['OK']: gLogger.error(resultAll['Message']) DIRACexit(-1) resultHosts = client.getHostInfo() if not resultHosts['OK']: gLogger.error(resultHosts['Message']) DIRACexit(-1) resultInfo = client.getInfo() if not resultInfo['OK']: gLogger.error(resultInfo['Message']) DIRACexit(-1) resultMySQL = client.getMySQLStatus() if not resultMySQL['OK']: gLogger.error(resultMySQL['Message']) DIRACexit(-1) resultAllDB = client.getDatabases() if not resultAllDB['OK']: gLogger.error(resultAllDB['Message']) DIRACexit(-1) resultAvailableDB = client.getAvailableDatabases() if not resultAvailableDB['OK']: gLogger.error(resultAvailableDB['Message']) DIRACexit(-1) records = [] finalSet = list(set(resultAll['Value']) - set(excludedHosts)) for host in finalSet: hasMySQL = True result = resultAll['Value'][host] hostResult = resultHosts['Value'][host] infoResult = resultInfo['Value'][host] mySQLResult = resultMySQL['Value'][host] allDBResult = resultAllDB['Value'][host] availableDBResult = resultAvailableDB['Value'][host] if not result['OK']: gLogger.error('Host %s: %s' % (host, result['Message'])) continue if not hostResult['OK']: gLogger.error('Host %s: %s' % (host, hostResult['Message'])) continue if not infoResult['OK']: gLogger.error('Host %s: %s' % (host, infoResult['Message'])) continue if mySQLResult['OK']: if not allDBResult['OK']: gLogger.error('Host %s: %s' % (host, allDBResult['Message'])) continue if not availableDBResult['OK']: gLogger.error('Host %s: %s' % (host, availableDBResult['Message'])) continue else: hasMySQL = False setup = infoResult['Value']['Setup'] if setup != mySetup: continue cpu = hostResult['Value']['CPUModel'].strip() rDict = result['Value'] # Components other than databases for compType in rDict: if componentType and componentType != compType: continue for system in rDict[compType]: components = sorted(rDict[compType][system]) for component in components: record = {'Installation': {}, 'Component': {}, 'Host': {}} if rDict[compType][system][component]['Installed'] and \ component != 'ComponentMonitoring': runitStatus = \ str(rDict[compType][system][component]['RunitStatus']) if runitStatus != 'Unknown': module = \ str(rDict[compType][system][component]['Module']) record['Component']['System'] = system record['Component']['Module'] = module # Transform 'Services' into 'service', 'Agents' into 'agent' ... record['Component']['Type'] = compType.lower()[:-1] record['Host']['HostName'] = host record['Host']['CPU'] = cpu record['Installation']['Instance'] = component record['Installation'][ 'InstallationTime'] = datetime.utcnow() record['Installation']['InstalledBy'] = user records.append(record) # Databases csClient = CSAPI() cfg = csClient.getCurrentCFG()['Value'] if hasMySQL: allDB = allDBResult['Value'] availableDB = availableDBResult['Value'] for db in allDB: # Check for DIRAC only databases if db in availableDB and db != 'InstalledComponentsDB': # Check for 'installed' databases isSection = cfg.isSection( 'Systems/' + availableDB[db]['System'] + '/' + cfg.getOption('DIRAC/Setups/' + setup + '/' + availableDB[db]['System']) + '/Databases/' + db + '/') if isSection: record = { 'Installation': {}, 'Component': {}, 'Host': {} } record['Component']['System'] = availableDB[db][ 'System'] record['Component']['Module'] = db record['Component']['Type'] = 'DB' record['Host']['HostName'] = host record['Host']['CPU'] = cpu record['Installation']['Instance'] = db record['Installation'][ 'InstallationTime'] = datetime.utcnow() record['Installation']['InstalledBy'] = user records.append(record) monitoringClient = ComponentMonitoringClient() # Add the installations to the database for record in records: result = MonitoringUtilities.monitorInstallation( record['Component']['Type'], record['Component']['System'], record['Installation']['Instance'], record['Component']['Module'], record['Host']['CPU'], record['Host']['HostName']) if not result['OK']: gLogger.error(result['Message'])
def sendMail(self, sendDict=None, title=None, body=None, fromAddress=None): """ Sending an email using sendDict: { e-mail : name } as addressbook title and body is the e-mail's Subject and Body fromAddress is an email address in behalf of whom the message is sent Return success/failure JSON structure """ if not sendDict: result = "" gLogger.debug(result) return {"success": "false", "error": result} if not title: result = "title argument is missing" gLogger.debug(result) return {"success": "false", "error": result} if not body: result = "body argument is missing" gLogger.debug(result) return {"success": "false", "error": result} if not fromAddress: result = "fromAddress argument is missing" gLogger.debug(result) return {"success": "false", "error": result} sentSuccess = list() sentFailed = list() gLogger.debug("Initializing Notification client") ntc = NotificationClient( lambda x, timeout: RPCClient(x, timeout=timeout, static=True)) for email, name in sendDict.iteritems(): result = ntc.sendMail(email, title, body, fromAddress, False) if not result["OK"]: error = name + ": " + result["Message"] sentFailed.append(error) gLogger.error("Sent failure: ", error) else: gLogger.info("Successfully sent to %s" % name) sentSuccess.append(name) success = ", ".join(sentSuccess) failure = "\n".join(sentFailed) if success and failure: result = "Successfully sent e-mail to: " result = result + success + "\n\nFailed to send e-mail to:\n" + failure gLogger.debug(result) return {"success": "true", "result": result} elif success and len(failure) < 1: result = "Successfully sent e-mail to: %s" % success gLogger.debug(result) return {"success": "true", "result": result} elif len(success) < 1 and failure: result = "Failed to sent email to:\n%s" % failure gLogger.debug(result) return {"success": "false", "error": result} result = "No messages were sent due technical failure" gLogger.debug(result) return {"success": "false", "error": result}
class ProxyDB(DB): NOTIFICATION_TIMES = [2592000, 1296000] def __init__(self, useMyProxy=False): DB.__init__(self, 'ProxyDB', 'Framework/ProxyDB') random.seed() self.__defaultRequestLifetime = 300 # 5min self.__defaultTokenLifetime = 86400 * 7 # 1 week self.__defaultTokenMaxUses = 50 self.__useMyProxy = useMyProxy self._minSecsToAllowStore = 3600 self.__notifClient = NotificationClient() retVal = self.__initializeDB() if not retVal['OK']: raise Exception("Can't create tables: %s" % retVal['Message']) self.purgeExpiredProxies(sendNotifications=False) self.__checkDBVersion() def getMyProxyServer(self): return gConfig.getValue("/DIRAC/VOPolicy/MyProxyServer", "myproxy.cern.ch") def getMyProxyMaxLifeTime(self): return gConfig.getValue("/DIRAC/VOPolicy/MyProxyMaxDelegationTime", 168) * 3600 def __initializeDB(self): """ Create the tables """ retVal = self._query("show tables") if not retVal['OK']: return retVal tablesInDB = [t[0] for t in retVal['Value']] tablesD = {} if 'ProxyDB_Requests' not in tablesInDB: tablesD['ProxyDB_Requests'] = { 'Fields': { 'Id': 'INTEGER AUTO_INCREMENT NOT NULL', 'UserDN': 'VARCHAR(255) NOT NULL', 'Pem': 'BLOB', 'ExpirationTime': 'DATETIME' }, 'PrimaryKey': 'Id' } if 'ProxyDB_Proxies' not in tablesInDB: tablesD['ProxyDB_Proxies'] = { 'Fields': { 'UserName': '******', 'UserDN': 'VARCHAR(255) NOT NULL', 'UserGroup': 'VARCHAR(255) NOT NULL', 'Pem': 'BLOB', 'ExpirationTime': 'DATETIME', 'PersistentFlag': 'ENUM ("True","False") NOT NULL DEFAULT "True"', }, 'PrimaryKey': ['UserDN', 'UserGroup'] } if 'ProxyDB_VOMSProxies' not in tablesInDB: tablesD['ProxyDB_VOMSProxies'] = { 'Fields': { 'UserName': '******', 'UserDN': 'VARCHAR(255) NOT NULL', 'UserGroup': 'VARCHAR(255) NOT NULL', 'VOMSAttr': 'VARCHAR(255) NOT NULL', 'Pem': 'BLOB', 'ExpirationTime': 'DATETIME', }, 'PrimaryKey': ['UserDN', 'UserGroup', 'vomsAttr'] } if 'ProxyDB_Log' not in tablesInDB: tablesD['ProxyDB_Log'] = { 'Fields': { 'ID': 'BIGINT NOT NULL AUTO_INCREMENT', 'IssuerDN': 'VARCHAR(255) NOT NULL', 'IssuerGroup': 'VARCHAR(255) NOT NULL', 'TargetDN': 'VARCHAR(255) NOT NULL', 'TargetGroup': 'VARCHAR(255) NOT NULL', 'Action': 'VARCHAR(128) NOT NULL', 'Timestamp': 'DATETIME', }, 'PrimaryKey': 'ID', 'Indexes': { 'Timestamp': ['Timestamp'] } } if 'ProxyDB_Tokens' not in tablesInDB: tablesD['ProxyDB_Tokens'] = { 'Fields': { 'Token': 'VARCHAR(64) NOT NULL', 'RequesterDN': 'VARCHAR(255) NOT NULL', 'RequesterGroup': 'VARCHAR(255) NOT NULL', 'ExpirationTime': 'DATETIME NOT NULL', 'UsesLeft': 'SMALLINT UNSIGNED DEFAULT 1', }, 'PrimaryKey': 'Token' } if 'ProxyDB_ExpNotifs' not in tablesInDB: tablesD['ProxyDB_ExpNotifs'] = { 'Fields': { 'UserDN': 'VARCHAR(255) NOT NULL', 'UserGroup': 'VARCHAR(255) NOT NULL', 'LifeLimit': 'INTEGER UNSIGNED DEFAULT 0', 'ExpirationTime': 'DATETIME NOT NULL', }, 'PrimaryKey': ['UserDN', 'UserGroup'] } return self._createTables(tablesD) def __addUserNameToTable(self, tableName): result = self._update( "ALTER TABLE `%s` ADD COLUMN UserName VARCHAR(64) NOT NULL" % tableName) if not result['OK']: return result result = self._query("SELECT DISTINCT UserName, UserDN FROM `%s`" % tableName) if not result['OK']: return result data = result['Value'] for userName, userDN in data: if not userName: result = Registry.getUsernameForDN(userDN) if not result['OK']: self.log.error("Could not retrieve username for DN", userDN) continue userName = result['Value'] try: userName = self._escapeString(userName)['Value'] userDN = self._escapeString(userDN)['Value'] except KeyError: self.log.error("Could not escape username or DN", "%s %s" % (userName, userDN)) continue userName = result['Value'] result = self._update( "UPDATE `%s` SET UserName=%s WHERE UserDN=%s" % (tableName, userName, userDN)) if not result['OK']: self.log.error("Could update username for DN", "%s: %s" % (userDN, result['Message'])) continue self.log.info("UserDN %s has user %s" % (userDN, userName)) return S_OK() def __checkDBVersion(self): for tableName in ("ProxyDB_Proxies", "ProxyDB_VOMSProxies"): result = self._query("describe `%s`" % tableName) if not result['OK']: return result if 'UserName' not in [row[0] for row in result['Value']]: self.log.notice( "Username missing in table %s schema. Adding it" % tableName) result = self.__addUserNameToTable(tableName) if not result['OK']: return result def generateDelegationRequest(self, proxyChain, userDN): """ Generate a request and store it for a given proxy Chain """ retVal = self._getConnection() if not retVal['OK']: return retVal connObj = retVal['Value'] retVal = proxyChain.generateProxyRequest() if not retVal['OK']: return retVal request = retVal['Value'] retVal = request.dumpRequest() if not retVal['OK']: return retVal reqStr = retVal['Value'] retVal = request.dumpPKey() if not retVal['OK']: return retVal allStr = reqStr + retVal['Value'] try: sUserDN = self._escapeString(userDN)['Value'] sAllStr = self._escapeString(allStr)['Value'] except KeyError: return S_ERROR("Cannot escape DN") cmd = "INSERT INTO `ProxyDB_Requests` ( Id, UserDN, Pem, ExpirationTime )" cmd += " VALUES ( 0, %s, %s, TIMESTAMPADD( SECOND, %d, UTC_TIMESTAMP() ) )" % ( sUserDN, sAllStr, int(self.__defaultRequestLifetime)) retVal = self._update(cmd, conn=connObj) if not retVal['OK']: return retVal #99% of the times we will stop here if 'lastRowId' in retVal: return S_OK({'id': retVal['lastRowId'], 'request': reqStr}) #If the lastRowId hack does not work. Get it by hand retVal = self._query( "SELECT Id FROM `ProxyDB_Requests` WHERE Pem='%s'" % reqStr) if not retVal['OK']: return retVal data = retVal['Value'] if len(data) == 0: return S_ERROR( "Insertion of the request in the db didn't work as expected") retVal = proxyChain.getDIRACGroup() if retVal['OK'] and retVal['Value']: userGroup = retVal['Value'] else: userGroup = "unset" self.logAction("request upload", userDN, userGroup, userDN, "any") #Here we go! return S_OK({'id': data[0][0], 'request': reqStr}) def retrieveDelegationRequest(self, requestId, userDN): """ Retrieve a request from the DB """ try: sUserDN = self._escapeString(userDN)['Value'] except KeyError: return S_ERROR("Cannot escape DN") cmd = "SELECT Pem FROM `ProxyDB_Requests` WHERE Id = %s AND UserDN = %s" % ( requestId, sUserDN) retVal = self._query(cmd) if not retVal['OK']: return retVal data = retVal['Value'] if len(data) == 0: return S_ERROR("No requests with id %s" % requestId) request = X509Request() retVal = request.loadAllFromString(data[0][0]) if not retVal['OK']: return retVal return S_OK(request) def purgeExpiredRequests(self): """ Purge expired requests from the db """ cmd = "DELETE FROM `ProxyDB_Requests` WHERE ExpirationTime < UTC_TIMESTAMP()" return self._update(cmd) def deleteRequest(self, requestId): """ Delete a request from the db """ cmd = "DELETE FROM `ProxyDB_Requests` WHERE Id=%s" % requestId return self._update(cmd) def completeDelegation(self, requestId, userDN, delegatedPem): """ Complete a delegation and store it in the db """ retVal = self.retrieveDelegationRequest(requestId, userDN) if not retVal['OK']: return retVal request = retVal['Value'] chain = X509Chain(keyObj=request.getPKey()) retVal = chain.loadChainFromString(delegatedPem) if not retVal['OK']: return retVal retVal = chain.isValidProxy(ignoreDefault=True) noGroupFlag = False if not retVal['OK']: if retVal['Message'] == "Proxy does not have an explicit group": noGroupFlag = True else: return retVal result = chain.isVOMS() if result['OK'] and result['Value']: return S_ERROR( "Proxies with VOMS extensions are not allowed to be uploaded") retVal = request.checkChain(chain) if not retVal['OK']: return retVal if not retVal['Value']: return S_ERROR("Received chain does not match request: %s" % retVal['Message']) retVal = chain.getDIRACGroup() if not retVal['OK']: return retVal userGroup = retVal['Value'] if not userGroup: userGroup = Registry.getDefaultUserGroup() retVal = Registry.getGroupsForDN(userDN) if not retVal['OK']: return retVal if not userGroup in retVal['Value']: return S_ERROR("%s group is not valid for %s" % (userGroup, userDN)) # For proxies without embedded DIRAC group only one default is allowed # Cleaning all the proxies for this DN if any before uploading the new one. if noGroupFlag: retVal = self.deleteProxy(userDN) if not retVal['OK']: return retVal retVal = self.storeProxy(userDN, userGroup, chain) if not retVal['OK']: return retVal retVal = self.deleteRequest(requestId) if not retVal['OK']: return retVal return S_OK() def storeProxy(self, userDN, userGroup, chain): """ Store user proxy into the Proxy repository for a user specified by his DN and group. """ retVal = Registry.getUsernameForDN(userDN) if not retVal['OK']: return retVal userName = retVal['Value'] #Get remaining secs retVal = chain.getRemainingSecs() if not retVal['OK']: return retVal remainingSecs = retVal['Value'] if remainingSecs < self._minSecsToAllowStore: return S_ERROR( "Cannot store proxy, remaining secs %s is less than %s" % (remainingSecs, self._minSecsToAllowStore)) #Compare the DNs retVal = chain.getIssuerCert() if not retVal['OK']: return retVal proxyIdentityDN = retVal['Value'].getSubjectDN()['Value'] if not userDN == proxyIdentityDN: msg = "Mismatch in the user DN" vMsg = "Proxy says %s and credentials are %s" % (proxyIdentityDN, userDN) self.log.error(msg, vMsg) return S_ERROR("%s. %s" % (msg, vMsg)) #Check the groups retVal = chain.getDIRACGroup() if not retVal['OK']: return retVal proxyGroup = retVal['Value'] if not proxyGroup: proxyGroup = Registry.getDefaultUserGroup() if not userGroup == proxyGroup: msg = "Mismatch in the user group" vMsg = "Proxy says %s and credentials are %s" % (proxyGroup, userGroup) self.log.error(msg, vMsg) return S_ERROR("%s. %s" % (msg, vMsg)) #Check if its limited if chain.isLimitedProxy()['Value']: return S_ERROR("Limited proxies are not allowed to be stored") dLeft = remainingSecs / 86400 hLeft = remainingSecs / 3600 - dLeft * 24 mLeft = remainingSecs / 60 - hLeft * 60 - dLeft * 1440 sLeft = remainingSecs - hLeft * 3600 - mLeft * 60 - dLeft * 86400 self.log.info( "Storing proxy for credentials %s (%d:%02d:%02d:%02d left)" % (proxyIdentityDN, dLeft, hLeft, mLeft, sLeft)) try: sUserDN = self._escapeString(userDN)['Value'] sUserGroup = self._escapeString(userGroup)['Value'] except KeyError: return S_ERROR("Cannot escape DN") # Check what we have already got in the repository cmd = "SELECT TIMESTAMPDIFF( SECOND, UTC_TIMESTAMP(), ExpirationTime ), Pem FROM `ProxyDB_Proxies` WHERE UserDN=%s AND UserGroup=%s" % ( sUserDN, sUserGroup) result = self._query(cmd) if not result['OK']: return result # check if there is a previous ticket for the DN data = result['Value'] sqlInsert = True if len(data) > 0: sqlInsert = False pem = data[0][1] if pem: remainingSecsInDB = data[0][0] if remainingSecs <= remainingSecsInDB: self.log.info( "Proxy stored is longer than uploaded, omitting.", "%s in uploaded, %s in db" % (remainingSecs, remainingSecsInDB)) return S_OK() pemChain = chain.dumpAllToString()['Value'] dValues = { 'UserName': self._escapeString(userName)['Value'], 'UserDN': sUserDN, 'UserGroup': sUserGroup, 'Pem': self._escapeString(pemChain)['Value'], 'ExpirationTime': 'TIMESTAMPADD( SECOND, %d, UTC_TIMESTAMP() )' % int(remainingSecs), 'PersistentFlag': "'False'" } if sqlInsert: sqlFields = [] sqlValues = [] for key in dValues: sqlFields.append(key) sqlValues.append(dValues[key]) cmd = "INSERT INTO `ProxyDB_Proxies` ( %s ) VALUES ( %s )" % ( ", ".join(sqlFields), ", ".join(sqlValues)) else: sqlSet = [] sqlWhere = [] for k in dValues: if k in ('UserDN', 'UserGroup'): sqlWhere.append("%s = %s" % (k, dValues[k])) else: sqlSet.append("%s = %s" % (k, dValues[k])) cmd = "UPDATE `ProxyDB_Proxies` SET %s WHERE %s" % ( ", ".join(sqlSet), " AND ".join(sqlWhere)) self.logAction("store proxy", userDN, userGroup, userDN, userGroup) return self._update(cmd) def purgeExpiredProxies(self, sendNotifications=True): """ Purge expired requests from the db """ purged = 0 for tableName in ("ProxyDB_Proxies", "ProxyDB_VOMSProxies"): cmd = "DELETE FROM `%s` WHERE ExpirationTime < UTC_TIMESTAMP()" % tableName result = self._update(cmd) if not result['OK']: return result purged += result['Value'] self.log.info("Purged %s expired proxies from %s" % (result['Value'], tableName)) if sendNotifications: result = self.sendExpirationNotifications() if not result['OK']: return result return S_OK(purged) def deleteProxy(self, userDN, userGroup='any'): """ Remove proxy of the given user from the repository """ try: userDN = self._escapeString(userDN)['Value'] if userGroup != 'any': userGroup = self._escapeString(userGroup)['Value'] except KeyError: return S_ERROR("Invalid DN or group") req = "DELETE FROM `%%s` WHERE UserDN=%s" % userDN if userGroup != 'any': req += " AND UserGroup=%s" % userGroup for db in ['ProxyDB_Proxies', 'ProxyDB_VOMSProxies']: result = self._update(req % db) return result def __getPemAndTimeLeft(self, userDN, userGroup=False, vomsAttr=False): try: sUserDN = self._escapeString(userDN)['Value'] if userGroup: sUserGroup = self._escapeString(userGroup)['Value'] if vomsAttr: sVomsAttr = self._escapeString(vomsAttr)['Value'] except KeyError: return S_ERROR("Invalid DN or group") if not vomsAttr: table = "`ProxyDB_Proxies`" else: table = "`ProxyDB_VOMSProxies`" cmd = "SELECT Pem, TIMESTAMPDIFF( SECOND, UTC_TIMESTAMP(), ExpirationTime ) from %s" % table cmd += "WHERE UserDN=%s AND TIMESTAMPDIFF( SECOND, UTC_TIMESTAMP(), ExpirationTime ) > 0" % ( sUserDN) if userGroup: cmd += " AND UserGroup=%s" % sUserGroup if vomsAttr: cmd += " AND VOMSAttr=%s" % sVomsAttr retVal = self._query(cmd) if not retVal['OK']: return retVal data = retVal['Value'] for record in data: if record[0]: return S_OK((record[0], record[1])) if userGroup: userMask = "%s@%s" % (userDN, userGroup) else: userMask = userDN return S_ERROR("%s has no proxy registered" % userMask) def renewFromMyProxy(self, userDN, userGroup, lifeTime=False, chain=False): if not lifeTime: lifeTime = 43200 if not self.__useMyProxy: return S_ERROR("myproxy is disabled") #Get the chain if not chain: retVal = self.__getPemAndTimeLeft(userDN, userGroup) if not retVal['OK']: return retVal pemData = retVal['Value'][0] chain = X509Chain() retVal = chain.loadProxyFromString(pemData) if not retVal['OK']: return retVal originChainLifeTime = chain.getRemainingSecs()['Value'] maxMyProxyLifeTime = self.getMyProxyMaxLifeTime() #If we have a chain that's 0.8 of max mplifetime don't ask to mp if originChainLifeTime > maxMyProxyLifeTime * 0.8: self.log.error( "Skipping myproxy download", "user %s %s chain has %s secs and requested %s secs" % (userDN, userGroup, originChainLifeTime, maxMyProxyLifeTime)) return S_OK(chain) lifeTime *= 1.3 if lifeTime > maxMyProxyLifeTime: lifeTime = maxMyProxyLifeTime self.log.error( "Renewing proxy from myproxy", "user %s %s for %s secs" % (userDN, userGroup, lifeTime)) myProxy = MyProxy(server=self.getMyProxyServer()) retVal = myProxy.getDelegatedProxy(chain, lifeTime) if not retVal['OK']: return retVal mpChain = retVal['Value'] retVal = mpChain.getRemainingSecs() if not retVal['OK']: return S_ERROR( "Can't retrieve remaining secs from renewed proxy: %s" % retVal['Message']) mpChainSecsLeft = retVal['Value'] if mpChainSecsLeft < originChainLifeTime: self.log.info( "Chain downloaded from myproxy has less lifetime than the one stored in the db", "\n Downloaded from myproxy: %s secs\n Stored in DB: %s secs" % (mpChainSecsLeft, originChainLifeTime)) return S_OK(chain) retVal = mpChain.getDIRACGroup() if not retVal['OK']: return S_ERROR( "Can't retrieve DIRAC Group from renewed proxy: %s" % retVal['Message']) chainGroup = retVal['Value'] if chainGroup != userGroup: return S_ERROR( "Mismatch between renewed proxy group and expected: %s vs %s" % (userGroup, chainGroup)) retVal = self.storeProxy(userDN, userGroup, mpChain) if not retVal['OK']: self.log.error("Cannot store proxy after renewal", retVal['Message']) retVal = myProxy.getServiceDN() if not retVal['OK']: hostDN = userDN else: hostDN = retVal['Value'] self.logAction("myproxy renewal", hostDN, "host", userDN, userGroup) return S_OK(mpChain) def __getPUSProxy(self, userDN, userGroup, requiredLifetime, requestedVOMSAttr=None): result = Registry.getGroupsForDN(userDN) if not result['OK']: return result validGroups = result['Value'] if not userGroup in validGroups: return S_ERROR('Invalid group %s for user' % userGroup) voName = Registry.getVOForGroup(userGroup) if not voName: return S_ERROR('Can not determine VO for group %s' % userGroup) retVal = self.__getVOMSAttribute(userGroup, requestedVOMSAttr) if not retVal['OK']: return retVal vomsAttribute = retVal['Value']['attribute'] vomsVO = retVal['Value']['VOMSVO'] puspServiceURL = Registry.getVOOption(voName, 'PUSPServiceURL') if not puspServiceURL: return S_ERROR('Can not determine PUSP service URL for VO %s' % voName) user = userDN.split(":")[-1] puspURL = "%s?voms=%s:%s&proxy-renewal=false&disable-voms-proxy=false" \ "&rfc-proxy=true&cn-label=user:%s" % ( puspServiceURL, vomsVO, vomsAttribute, user ) try: proxy = urllib.urlopen(puspURL).read() except Exception as e: return S_ERROR('Failed to get proxy from the PUSP server') chain = X509Chain() chain.loadChainFromString(proxy) chain.loadKeyFromString(proxy) result = chain.getCredentials() if not result['OK']: return S_ERROR('Failed to get a valid PUSP proxy') credDict = result['Value'] if credDict['identity'] != userDN: return S_ERROR( 'Requested DN does not match the obtained one in the PUSP proxy' ) timeLeft = credDict['secondsLeft'] result = chain.generateProxyToString(lifeTime=timeLeft, diracGroup=userGroup) if not result['OK']: return result proxyString = result['Value'] return S_OK((proxyString, timeLeft)) def getProxy(self, userDN, userGroup, requiredLifeTime=False): """ Get proxy string from the Proxy Repository for use with userDN in the userGroup """ # Get the Per User SubProxy if one is requested if isPUSPdn(userDN): result = self.__getPUSProxy(userDN, userGroup, requiredLifeTime) if not result['OK']: return result pemData = result['Value'][0] timeLeft = result['Value'][1] chain = X509Chain() result = chain.loadProxyFromString(pemData) if not result['OK']: return result return S_OK((chain, timeLeft)) # Standard proxy is requested retVal = self.__getPemAndTimeLeft(userDN, userGroup) if not retVal['OK']: return retVal pemData = retVal['Value'][0] timeLeft = retVal['Value'][1] chain = X509Chain() retVal = chain.loadProxyFromString(pemData) if not retVal['OK']: return retVal if requiredLifeTime: if timeLeft < requiredLifeTime: retVal = self.renewFromMyProxy(userDN, userGroup, lifeTime=requiredLifeTime, chain=chain) if not retVal['OK']: return S_ERROR("Can't get a proxy for %s seconds: %s" % (requiredLifeTime, retVal['Message'])) chain = retVal['Value'] #Proxy is invalid for some reason, let's delete it if not chain.isValidProxy()['Value']: self.deleteProxy(userDN, userGroup) return S_ERROR("%s@%s has no proxy registered" % (userDN, userGroup)) return S_OK((chain, timeLeft)) def __getVOMSAttribute(self, userGroup, requiredVOMSAttribute=False): if requiredVOMSAttribute: return S_OK({ 'attribute': requiredVOMSAttribute, 'VOMSVO': Registry.getVOMSVOForGroup(userGroup) }) csVOMSMapping = Registry.getVOMSAttributeForGroup(userGroup) if not csVOMSMapping: return S_ERROR("No mapping defined for group %s in the CS" % userGroup) return S_OK({ 'attribute': csVOMSMapping, 'VOMSVO': Registry.getVOMSVOForGroup(userGroup) }) def getVOMSProxy(self, userDN, userGroup, requiredLifeTime=False, requestedVOMSAttr=False): """ Get proxy string from the Proxy Repository for use with userDN in the userGroup and VOMS attr """ retVal = self.__getVOMSAttribute(userGroup, requestedVOMSAttr) if not retVal['OK']: return retVal vomsAttr = retVal['Value']['attribute'] vomsVO = retVal['Value']['VOMSVO'] #Look in the cache retVal = self.__getPemAndTimeLeft(userDN, userGroup, vomsAttr) if retVal['OK']: pemData = retVal['Value'][0] vomsTime = retVal['Value'][1] chain = X509Chain() retVal = chain.loadProxyFromString(pemData) if retVal['OK']: retVal = chain.getRemainingSecs() if retVal['OK']: remainingSecs = retVal['Value'] if requiredLifeTime and requiredLifeTime <= vomsTime and requiredLifeTime <= remainingSecs: return S_OK((chain, min(vomsTime, remainingSecs))) if isPUSPdn(userDN): # Get the Per User SubProxy if one is requested result = self.__getPUSProxy(userDN, userGroup, requiredLifeTime, requestedVOMSAttr) if not result['OK']: return result pemData = result['Value'][0] chain = X509Chain() result = chain.loadProxyFromString(pemData) if not result['OK']: return result else: # Get the stored proxy and dress it with the VOMS extension retVal = self.getProxy(userDN, userGroup, requiredLifeTime) if not retVal['OK']: return retVal chain, secsLeft = retVal['Value'] if requiredLifeTime and requiredLifeTime > secsLeft: return S_ERROR("Stored proxy is not long lived enough") vomsMgr = VOMS() retVal = vomsMgr.getVOMSAttributes(chain) if retVal['OK']: attrs = retVal['Value'] if len(attrs) > 0: if attrs[0] != vomsAttr: return S_ERROR( "Stored proxy has already a different VOMS attribute %s than requested %s" % (vomsAttr, attrs[0])) else: result = self.__storeVOMSProxy(userDN, userGroup, vomsAttr, chain) if not result['OK']: return result secsLeft = result['Value'] if requiredLifeTime and requiredLifeTime <= secsLeft: return S_OK((chain, secsLeft)) return S_ERROR( "Stored proxy has already a different VOMS attribute and is not long lived enough" ) retVal = vomsMgr.setVOMSAttributes(chain, vomsAttr, vo=vomsVO) if not retVal['OK']: return S_ERROR("Cannot append voms extension: %s" % retVal['Message']) chain = retVal['Value'] # We have got the VOMS proxy, store it into the cache result = self.__storeVOMSProxy(userDN, userGroup, vomsAttr, chain) if not result['OK']: return result secsLeft = result['Value'] return S_OK((chain, secsLeft)) def __storeVOMSProxy(self, userDN, userGroup, vomsAttr, chain): retVal = self._getConnection() if not retVal['OK']: return retVal connObj = retVal['Value'] retVal1 = VOMS().getVOMSProxyInfo(chain, 'actimeleft') retVal2 = VOMS().getVOMSProxyInfo(chain, 'timeleft') if not retVal1['OK']: return retVal1 if not retVal2['OK']: return retVal2 try: vomsSecsLeft1 = int(retVal1['Value'].strip()) vomsSecsLeft2 = int(retVal2['Value'].strip()) vomsSecsLeft = min(vomsSecsLeft1, vomsSecsLeft2) except Exception as e: return S_ERROR("Can't parse VOMS time left: %s" % str(e)) secsLeft = min(vomsSecsLeft, chain.getRemainingSecs()['Value']) pemData = chain.dumpAllToString()['Value'] result = Registry.getUsernameForDN(userDN) if not result['OK']: userName = "" else: userName = result['Value'] try: sUserName = self._escapeString(userName)['Value'] sUserDN = self._escapeString(userDN)['Value'] sUserGroup = self._escapeString(userGroup)['Value'] sVomsAttr = self._escapeString(vomsAttr)['Value'] sPemData = self._escapeString(pemData)['Value'] except KeyError: return S_ERROR("Could not escape some data") cmd = "REPLACE INTO `ProxyDB_VOMSProxies` ( UserName, UserDN, UserGroup, VOMSAttr, Pem, ExpirationTime ) VALUES " cmd += "( %s, %s, %s, %s, %s, TIMESTAMPADD( SECOND, %d, UTC_TIMESTAMP() ) )" % ( sUserName, sUserDN, sUserGroup, sVomsAttr, sPemData, secsLeft) result = self._update(cmd, conn=connObj) if not result['OK']: return result return S_OK(secsLeft) def getRemainingTime(self, userDN, userGroup): """ Returns the remaining time the proxy is valid """ try: userDN = self._escapeString(userDN)['Value'] userGroup = self._escapeString(userGroup)['Value'] except KeyError: return S_ERROR("Invalid DN or group") cmd = "SELECT TIMESTAMPDIFF( SECOND, UTC_TIMESTAMP(), ExpirationTime ) FROM `ProxyDB_Proxies`" retVal = self._query("%s WHERE UserDN = %s AND UserGroup = %s" % (cmd, userDN, userGroup)) if not retVal['OK']: return retVal data = retVal['Value'] if not data: return S_OK(0) return S_OK(int(data[0][0])) def getUsers(self, validSecondsLeft=0, dnMask=False, groupMask=False, userMask=False): """ Get all the distinct users from the Proxy Repository. Optionally, only users with valid proxies within the given validity period expressed in seconds """ cmd = "SELECT UserName, UserDN, UserGroup, ExpirationTime, PersistentFlag FROM `ProxyDB_Proxies`" sqlCond = [] if validSecondsLeft: try: validSecondsLeft = int(validSecondsLeft) except ValueError: return S_ERROR("Seconds left has to be an integer") sqlCond.append( "TIMESTAMPDIFF( SECOND, UTC_TIMESTAMP(), ExpirationTime ) > %d" % validSecondsLeft) for field, mask in (('UserDN', dnMask), ('UserGroup', groupMask), ('UserName', userMask)): if not mask: continue if type(mask) not in (types.ListType, types.TupleType): mask = [mask] mask = [self._escapeString(entry)['Value'] for entry in mask] sqlCond.append("%s in ( %s )" % (field, ", ".join(mask))) if sqlCond: cmd += " WHERE %s" % " AND ".join(sqlCond) retVal = self._query(cmd) if not retVal['OK']: return retVal data = [] for record in retVal['Value']: data.append({ 'Name': record[0], 'DN': record[1], 'group': record[2], 'expirationtime': record[3], 'persistent': record[4] == 'True' }) return S_OK(data) def getCredentialsAboutToExpire(self, requiredSecondsLeft, onlyPersistent=True): cmd = "SELECT UserDN, UserGroup, ExpirationTime, PersistentFlag FROM `ProxyDB_Proxies`" cmd += " WHERE TIMESTAMPDIFF( SECOND, ExpirationTime, UTC_TIMESTAMP() ) < %d and TIMESTAMPDIFF( SECOND, ExpirationTime, UTC_TIMESTAMP() ) > 0" % requiredSecondsLeft if onlyPersistent: cmd += " AND PersistentFlag = 'True'" return self._query(cmd) def setPersistencyFlag(self, userDN, userGroup, persistent=True): """ Set the proxy PersistentFlag to the flag value """ try: sUserDN = self._escapeString(userDN)['Value'] sUserGroup = self._escapeString(userGroup)['Value'] except KeyError: return S_ERROR("Can't escape something") if persistent: sqlFlag = "True" else: sqlFlag = "False" retVal = self._query( "SELECT PersistentFlag FROM `ProxyDB_Proxies` WHERE UserDN=%s AND UserGroup=%s" % (sUserDN, sUserGroup)) sqlInsert = True if retVal['OK']: data = retVal['Value'] if len(data) > 0: sqlInsert = False if data[0][0] == sqlFlag: return S_OK() if sqlInsert: #If it's not in the db and we're removing the persistency then do nothing if not persistent: return S_OK() cmd = "INSERT INTO `ProxyDB_Proxies` ( UserDN, UserGroup, Pem, ExpirationTime, PersistentFlag ) VALUES " cmd += "( %s, %s, '', UTC_TIMESTAMP(), 'True' )" % (sUserDN, sUserGroup) else: cmd = "UPDATE `ProxyDB_Proxies` SET PersistentFlag='%s' WHERE UserDN=%s AND UserGroup=%s" % ( sqlFlag, sUserDN, sUserGroup) retVal = self._update(cmd) if not retVal['OK']: return retVal return S_OK() def getProxiesContent(self, selDict, sortList, start=0, limit=0): """ Function to get the contents of the db parameters are a filter to the db """ fields = ("UserName", "UserDN", "UserGroup", "ExpirationTime", "PersistentFlag") cmd = "SELECT %s FROM `ProxyDB_Proxies`" % ", ".join(fields) sqlWhere = ["Pem is not NULL"] for field in selDict: if field not in fields: continue fVal = selDict[field] if type(fVal) in (types.DictType, types.TupleType, types.ListType): sqlWhere.append("%s in (%s)" % (field, ", ".join([ self._escapeString(str(value))['Value'] for value in fVal ]))) else: sqlWhere.append( "%s = %s" % (field, self._escapeString(str(fVal))['Value'])) sqlOrder = [] if sortList: for sort in sortList: if len(sort) == 1: sort = (sort, "DESC") elif len(sort) > 2: return S_ERROR("Invalid sort %s" % sort) if sort[0] not in fields: return S_ERROR("Invalid sorting field %s" % sort[0]) if sort[1].upper() not in ("ASC", "DESC"): return S_ERROR("Invalid sorting order %s" % sort[1]) sqlOrder.append("%s %s" % (sort[0], sort[1])) if sqlWhere: cmd = "%s WHERE %s" % (cmd, " AND ".join(sqlWhere)) if sqlOrder: cmd = "%s ORDER BY %s" % (cmd, ", ".join(sqlOrder)) if limit: try: start = int(start) limit = int(limit) except ValueError: return S_ERROR("start and limit have to be integers") cmd += " LIMIT %d,%d" % (start, limit) retVal = self._query(cmd) if not retVal['OK']: return retVal data = [] for record in retVal['Value']: record = list(record) if record[4] == 'True': record[4] = True else: record[4] = False data.append(record) totalRecords = len(data) cmd = "SELECT COUNT( UserGroup ) FROM `ProxyDB_Proxies`" if sqlWhere: cmd = "%s WHERE %s" % (cmd, " AND ".join(sqlWhere)) retVal = self._query(cmd) if retVal['OK']: totalRecords = retVal['Value'][0][0] return S_OK({ 'ParameterNames': fields, 'Records': data, 'TotalRecords': totalRecords }) def logAction(self, action, issuerDN, issuerGroup, targetDN, targetGroup): """ Add an action to the log """ try: sAction = self._escapeString(action)['Value'] sIssuerDN = self._escapeString(issuerDN)['Value'] sIssuerGroup = self._escapeString(issuerGroup)['Value'] sTargetDN = self._escapeString(targetDN)['Value'] sTargetGroup = self._escapeString(targetGroup)['Value'] except KeyError: return S_ERROR("Can't escape from death") cmd = "INSERT INTO `ProxyDB_Log` ( Action, IssuerDN, IssuerGroup, TargetDN, TargetGroup, Timestamp ) VALUES " cmd += "( %s, %s, %s, %s, %s, UTC_TIMESTAMP() )" % ( sAction, sIssuerDN, sIssuerGroup, sTargetDN, sTargetGroup) retVal = self._update(cmd) if not retVal['OK']: self.log.error("Can't add a proxy action log: ", retVal['Message']) def purgeLogs(self): """ Purge expired requests from the db """ cmd = "DELETE FROM `ProxyDB_Log` WHERE TIMESTAMPDIFF( SECOND, Timestamp, UTC_TIMESTAMP() ) > 15552000" return self._update(cmd) def getLogsContent(self, selDict, sortList, start=0, limit=0): """ Function to get the contents of the logs table parameters are a filter to the db """ fields = ("Action", "IssuerDN", "IssuerGroup", "TargetDN", "TargetGroup", "Timestamp") cmd = "SELECT %s FROM `ProxyDB_Log`" % ", ".join(fields) if selDict: qr = [] if 'beforeDate' in selDict: qr.append("Timestamp < %s" % self._escapeString(selDict['beforeDate'])['Value']) del selDict['beforeDate'] if 'afterDate' in selDict: qr.append("Timestamp > %s" % self._escapeString(selDict['afterDate'])['Value']) del selDict['afterDate'] for field in selDict: qr.append("(%s)" % " OR ".join([ "%s=%s" % (field, self._escapeString(str(value))['Value']) for value in selDict[field] ])) whereStr = " WHERE %s" % " AND ".join(qr) cmd += whereStr else: whereStr = "" if sortList: cmd += " ORDER BY %s" % ", ".join( ["%s %s" % (sort[0], sort[1]) for sort in sortList]) if limit: cmd += " LIMIT %d,%d" % (start, limit) retVal = self._query(cmd) if not retVal['OK']: return retVal data = retVal['Value'] totalRecords = len(data) cmd = "SELECT COUNT( Timestamp ) FROM `ProxyDB_Log`" cmd += whereStr retVal = self._query(cmd) if retVal['OK']: totalRecords = retVal['Value'][0][0] return S_OK({ 'ParameterNames': fields, 'Records': data, 'TotalRecords': totalRecords }) def generateToken(self, requesterDN, requesterGroup, numUses=1, lifeTime=0, retries=10): """ Generate and return a token and the number of uses for the token """ if not lifeTime: lifeTime = gConfig.getValue("/DIRAC/VOPolicy/TokenLifeTime", self.__defaultTokenLifetime) maxUses = gConfig.getValue("/DIRAC/VOPolicy/TokenMaxUses", self.__defaultTokenMaxUses) numUses = max(1, min(numUses, maxUses)) m = hashlib.md5() rndData = "%s.%s.%s.%s" % (time.time(), random.random(), numUses, lifeTime) m.update(rndData) token = m.hexdigest() fieldsSQL = ", ".join(("Token", "RequesterDN", "RequesterGroup", "ExpirationTime", "UsesLeft")) valuesSQL = ", ".join( (self._escapeString(token)['Value'], self._escapeString(requesterDN)['Value'], self._escapeString(requesterGroup)['Value'], "TIMESTAMPADD( SECOND, %d, UTC_TIMESTAMP() )" % int(lifeTime), str(numUses))) insertSQL = "INSERT INTO `ProxyDB_Tokens` ( %s ) VALUES ( %s )" % ( fieldsSQL, valuesSQL) result = self._update(insertSQL) if result['OK']: return S_OK((token, numUses)) if result['Message'].find("uplicate entry") > -1: if retries: return self.generateToken(numUses, lifeTime, retries - 1) return S_ERROR( "Max retries reached for token generation. Aborting") return result def purgeExpiredTokens(self): delSQL = "DELETE FROM `ProxyDB_Tokens` WHERE ExpirationTime < UTC_TIMESTAMP() OR UsesLeft < 1" return self._update(delSQL) def useToken(self, token, requesterDN, requesterGroup): sqlCond = " AND ".join( ("UsesLeft > 0", "Token=%s" % self._escapeString(token)['Value'], "RequesterDN=%s" % self._escapeString(requesterDN)['Value'], "RequesterGroup=%s" % self._escapeString(requesterGroup)['Value'], "ExpirationTime >= UTC_TIMESTAMP()")) updateSQL = "UPDATE `ProxyDB_Tokens` SET UsesLeft = UsesLeft - 1 WHERE %s" % sqlCond result = self._update(updateSQL) if not result['OK']: return result return S_OK(result['Value'] > 0) def __cleanExpNotifs(self): cmd = "DELETE FROM `ProxyDB_ExpNotifs` WHERE ExpirationTime < UTC_TIMESTAMP()" return self._update(cmd) def sendExpirationNotifications(self): result = self.__cleanExpNotifs() if not result['OK']: return result cmd = "SELECT UserDN, UserGroup, LifeLimit FROM `ProxyDB_ExpNotifs`" result = self._query(cmd) if not result['OK']: return result notifDone = dict([((row[0], row[1]), row[2]) for row in result['Value']]) notifLimits = sorted([ int(x) for x in self.getCSOption("NotificationTimes", ProxyDB.NOTIFICATION_TIMES) ]) sqlSel = "UserDN, UserGroup, TIMESTAMPDIFF( SECOND, UTC_TIMESTAMP(), ExpirationTime )" sqlCond = "TIMESTAMPDIFF( SECOND, UTC_TIMESTAMP(), ExpirationTime ) < %d" % max( notifLimits) cmd = "SELECT %s FROM `ProxyDB_Proxies` WHERE %s" % (sqlSel, sqlCond) result = self._query(cmd) if not result['OK']: return result pilotProps = (Properties.GENERIC_PILOT, Properties.PILOT) data = result['Value'] sent = [] for row in data: userDN, group, lTime = row #If it's a pilot proxy, skip it if Registry.groupHasProperties(group, pilotProps): continue #IF it dosn't hace the auto upload proxy, skip it if not Registry.getGroupOption(group, "AutoUploadProxy", False): continue notKey = (userDN, group) for notifLimit in notifLimits: if notifLimit < lTime: #Not yet in this notification limit continue if notKey in notifDone and notifDone[notKey] <= notifLimit: #Already notified for this notification limit break if not self._notifyProxyAboutToExpire(userDN, group, lTime, notifLimit): #Cannot send notification, retry later break try: sUserDN = self._escapeString(userDN)['Value'] sGroup = self._escapeString(group)['Value'] except KeyError: return S_ERROR("OOPS") if notKey not in notifDone: values = "( %s, %s, %d, TIMESTAMPADD( SECOND, %s, UTC_TIMESTAMP() ) )" % ( sUserDN, sGroup, notifLimit, lTime) cmd = "INSERT INTO `ProxyDB_ExpNotifs` ( UserDN, UserGroup, LifeLimit, ExpirationTime ) VALUES %s" % values result = self._update(cmd) if not result['OK']: gLogger.error("Could not mark notification as sent", result['Message']) else: values = "LifeLimit = %d, ExpirationTime = TIMESTAMPADD( SECOND, %s, UTC_TIMESTAMP() )" % ( notifLimit, lTime) cmd = "UPDATE `ProxyDB_ExpNotifs` SET %s WHERE UserDN = %s AND UserGroup = %s" % ( values, sUserDN, sGroup) result = self._update(cmd) if not result['OK']: gLogger.error("Could not mark notification as sent", result['Message']) sent.append((userDN, group, lTime)) notifDone[notKey] = notifLimit return S_OK(sent) def _notifyProxyAboutToExpire(self, userDN, userGroup, lTime, notifLimit): result = Registry.getUsernameForDN(userDN) if not result['OK']: return False userName = result['Value'] userEMail = Registry.getUserOption(userName, "Email", "") if not userEMail: gLogger.error("Could not discover user email", userName) return False daysLeft = int(lTime / 86400) msgSubject = "Your proxy uploaded to DIRAC will expire in %d days" % daysLeft msgBody = """\ Dear %s, The proxy you uploaded to DIRAC will expire in aproximately %d days. The proxy information is: DN: %s Group: %s If you plan on keep using this credentials please upload a newer proxy to DIRAC by executing: $ dirac-proxy-init -P -g %s --rfc If you have been issued different certificate, please make sure you have a proxy uploaded with that certificate. Cheers, DIRAC's Proxy Manager """ % (userName, daysLeft, userDN, userGroup, userGroup) result = self.__notifClient.sendMail( userEMail, msgSubject, msgBody, fromAddress='*****@*****.**') if not result['OK']: gLogger.error("Could not send email", result['Message']) return False return True
def __sendAMail( self , sendDict = None , body = None , sendTo = None ): """ Sending a "new user has registered" email using sendDict: { e-mail : name } as source of the info and body as an e-mail body Return JSON structure """ if not sendDict: result = "" gLogger.debug( result ) return { "success" : "false" , "error" : result } if not body: result = "body argument is missing" gLogger.debug( result ) return { "success" : "false" , "error" : result } if not sendTo: result = "sendTo argument is missing" gLogger.debug( result ) return { "success" : "false" , "error" : result } sentSuccess = list() sentFailed = list() gLogger.debug( "Initializing Notification client" ) ntc = NotificationClient( lambda x , timeout: getRPCClient( x , timeout = timeout , static = True ) ) if socket.gethostname().find( '.' ) >= 0: hostname = socket.gethostname() else: hostname = socket.gethostbyaddr( socket.gethostname() )[ 0 ] title = "New user has sent registration request to %s" % hostname for email , name in sendDict.iteritems(): result = ntc.sendMail( email , title , body , sendTo , False ) if not result[ "OK" ]: error = name + ": " + result[ "Message" ] sentFailed.append( error ) gLogger.error( "Sent failure: " , error ) else: gLogger.info( "Successfully sent to %s" % name ) sentSuccess.append( name ) success = ", ".join( sentSuccess ) failure = "\n".join( sentFailed ) if len( success ) > 0 and len( failure ) > 0: result = "Your registration request were sent successfully to: " result = result + success + "\n\nFailed to sent it to:\n" + failure gLogger.debug( result ) return { "success" : "true" , "result" : result } elif len( success ) > 0 and len( failure ) < 1: result = "Your registration request were sent successfully to: %s" % success gLogger.debug( result ) return { "success" : "true" , "result" : result } elif len( success ) < 1 and len( failure ) > 0: result = "Failed to sent your request to:\n%s" % failure gLogger.debug( result ) return { "success" : "false" , "error" : result } else: result = "No messages were sent to administrator due technical failure" gLogger.debug( result ) return { "success" : "false" , "error" : result }
class FileStatusTransformationAgent(AgentModule): """ FileStatusTransformationAgent """ def __init__(self, *args, **kwargs): AgentModule.__init__(self, *args, **kwargs) self.name = 'FileStatusTransformationAgent' self.enabled = False self.shifterProxy = 'DataManager' self.transformationTypes = ["Replication"] self.transformationStatuses = ["Active"] self.transformationFileStatuses = ["Assigned", "Problematic", "Processed", "Unused"] self.addressTo = ["*****@*****.**"] self.addressFrom = "*****@*****.**" self.emailSubject = "FileStatusTransformationAgent" self.accounting = defaultdict(list) self.errors = [] self.fcClient = FileCatalogClient() self.tClient = TransformationClient() self.reqClient = ReqClient() self.nClient = NotificationClient() def checkFileStatusFuncExists(self, status): """ returns True/False if a function to check transformation files with a given status exists or not """ checkFileStatusFuncName = "check_%s_files" % (status.lower()) if not (hasattr(self, checkFileStatusFuncName) and callable(getattr(self, checkFileStatusFuncName))): self.log.warn("Unable to process transformation files with status ", status) return False return True def beginExecution(self): """ Reload the configurations before every cycle """ self.enabled = self.am_getOption('EnableFlag', False) self.shifterProxy = self.am_setOption('shifterProxy', 'DataManager') self.transformationTypes = self.am_getOption('TransformationTypes', ["Replication"]) self.transformationStatuses = self.am_getOption('TransformationStatuses', ["Active"]) self.transformationFileStatuses = self.am_getOption( 'TransformationFileStatuses', ["Assigned", "Problematic", "Processed", "Unused"]) self.addressTo = self.am_getOption('MailTo', ["*****@*****.**"]) self.addressFrom = self.am_getOption('MailFrom', "*****@*****.**") self.transformationFileStatuses = filter(self.checkFileStatusFuncExists, self.transformationFileStatuses) self.accounting.clear() return S_OK() def sendNotification(self, transID, transType=None, sourceSEs=None, targetSEs=None): """ sends email notification about accounting information of a transformation """ if not(self.errors or self.accounting): return S_OK() emailBody = "Transformation ID: %s\n" % transID if transType: emailBody += "Transformation Type: %s\n" % transType if sourceSEs: emailBody += "Source SE: %s\n" % (" ".join(str(source) for source in sourceSEs)) if targetSEs: emailBody += "Target SE: %s\n\n" % (" ".join(str(target) for target in targetSEs)) rows = [] for action, transFiles in self.accounting.iteritems(): emailBody += "Total number of files with action %s: %s\n" % (action, len(transFiles)) for transFile in transFiles: rows.append([[transFile['LFN']], [str(transFile['AvailableOnSource'])], [str(transFile['AvailableOnTarget'])], [transFile['Status']], [action]]) if rows: columns = ["LFN", "Source", "Target", "Old Status", "Action"] emailBody += printTable(columns, rows, printOut=False, numbering=False, columnSeparator=' | ') if self.errors: emailBody += "\n\nErrors:" emailBody += "\n".join(self.errors) self.log.notice(emailBody) subject = "%s: %s" % (self.emailSubject, transID) for address in self.addressTo: res = self.nClient.sendMail(address, subject, emailBody, self.addressFrom, localAttempt=False) if not res['OK']: self.log.error("Failure to send Email notification to ", address) continue self.errors = [] self.accounting.clear() return S_OK() def logError(self, errStr, varMsg=''): self.log.error(errStr, varMsg) self.errors.append(errStr + varMsg) def execute(self): """ main execution loop of Agent """ res = self.getTransformations() if not res['OK']: self.log.error('Failure to get transformations', res['Message']) return S_ERROR("Failure to get transformations") transformations = res['Value'] if not transformations: self.log.notice('No transformations found with Status %s and Type %s ' % (self.transformationStatuses, self.transformationTypes)) return S_OK() self.log.notice('Will treat %d transformations' % len(transformations)) self.log.notice('Transformations: %s' % ",".join([str(transformation['TransformationID']) for transformation in transformations])) for trans in transformations: transID = trans['TransformationID'] if 'SourceSE' not in trans or not trans['SourceSE']: self.logError("SourceSE not set for transformation, skip processing, transID: ", "%s" % transID) self.sendNotification(transID) continue if 'TargetSE' not in trans or not trans['TargetSE']: self.logError("TargetSE not set for transformation, skip processing, transID: ", "%s" % transID) self.sendNotification(transID, sourceSEs=trans['SourceSE']) continue if 'DataTransType' not in trans: self.logError("Transformation Type not set for transformation, skip processing, transID: ", "%s" % transID) self.sendNotification(transID, sourceSEs=trans['SourceSE'], targetSEs=trans['TargetSE']) continue res = self.processTransformation(transID, trans['SourceSE'], trans['TargetSE'], trans['DataTransType']) if not res['OK']: self.log.error('Failure to process transformation with ID:', transID) continue return S_OK() def getTransformations(self, transID=None): """ returns transformations of a given type and status """ res = None if transID: res = self.tClient.getTransformations( condDict={'TransformationID': transID, 'Status': self.transformationStatuses, 'Type': self.transformationTypes}) else: res = self.tClient.getTransformations( condDict={'Status': self.transformationStatuses, 'Type': self.transformationTypes}) if not res['OK']: return res result = res['Value'] for trans in result: res = self.tClient.getTransformationParameters(trans['TransformationID'], ['SourceSE', 'TargetSE']) if not res['OK']: self.log.error('Failure to get SourceSE and TargetSE parameters for Transformation ID:', trans['TransformationID']) continue trans['SourceSE'] = eval(res['Value']['SourceSE']) trans['TargetSE'] = eval(res['Value']['TargetSE']) res = self.getDataTransformationType(trans['TransformationID']) if not res['OK']: self.log.error('Failure to determine Data Transformation Type', "%s: %s" % (trans['TransformationID'], res['Message'])) continue trans['DataTransType'] = res['Value'] return S_OK(result) def getRequestStatus(self, transID, taskIDs): """ returns request statuses for a given list of task IDs """ res = self.tClient.getTransformationTasks(condDict={'TransformationID': transID, 'TaskID': taskIDs}) if not res['OK']: self.log.error('Failure to get Transformation Tasks for Transformation ID:', transID) return res result = res['Value'] requestStatus = {} for task in result: requestStatus[task['TaskID']] = {'RequestStatus': task['ExternalStatus'], 'RequestID': long(task['ExternalID'])} return S_OK(requestStatus) def getDataTransformationType(self, transID): """ returns transformation types Replication/Moving/Unknown for a given transformation """ res = self.tClient.getTransformationParameters(transID, 'Body') if not res['OK']: return res # if body is empty then we assume that it is a replication transformation if not res['Value']: return S_OK(REPLICATION_TRANS) replication = False rmReplica = False try: body = json.loads(res['Value']) for operation in body: if 'ReplicateAndRegister' in operation: replication = True if 'RemoveReplica' in operation: rmReplica = True except ValueError: if 'ReplicateAndRegister' in res['Value']: replication = True if 'RemoveReplica' in res['Value']: rmReplica = True if rmReplica and replication: return S_OK(MOVING_TRANS) if replication: return S_OK(REPLICATION_TRANS) return S_ERROR("Unknown Transformation Type '%r'" % res['Value']) def setFileStatus(self, transID, transFiles, status): """ sets transformation file status """ lfns = [transFile['LFN'] for transFile in transFiles] lfnStatuses = {lfn: status for lfn in lfns} if lfnStatuses: if self.enabled: res = self.tClient.setFileStatusForTransformation(transID, newLFNsStatus=lfnStatuses, force=True) if not res['OK']: self.logError('Failed to set statuses for LFNs ', "%s" % res['Message']) return res for transFile in transFiles: self.accounting[status].append({'LFN': transFile['LFN'], 'Status': transFile['Status'], 'AvailableOnSource': transFile['AvailableOnSource'], 'AvailableOnTarget': transFile['AvailableOnTarget']}) return S_OK() def selectFailedRequests(self, transFile): """ returns True if transformation file has a failed request otherwise returns False """ res = self.getRequestStatus(transFile['TransformationID'], transFile['TaskID']) if not res['OK']: self.log.error('Failure to get Request Status for Assigned File') return False result = res['Value'] if result[transFile['TaskID']]['RequestStatus'] == 'Failed': return True return False def retryStrategyForFiles(self, transID, transFiles): """ returns retryStrategy Reset Request if a request is found in RMS, otherwise returns set file status to unused""" taskIDs = [transFile['TaskID'] for transFile in transFiles] res = self.getRequestStatus(transID, taskIDs) if not res['OK']: return res result = res['Value'] retryStrategy = defaultdict(dict) for taskID in taskIDs: if taskID is None: self.log.error("Task ID is None", "Transformation: %s\n Files: %r " % (transID, transFiles)) retryStrategy[None]['Strategy'] = SET_UNUSED continue res = self.reqClient.getRequest(requestID=result[taskID]['RequestID']) if not res['OK']: self.log.notice('Request %s does not exist setting file status to unused' % result[taskID]['RequestID']) retryStrategy[taskID]['Strategy'] = SET_UNUSED else: retryStrategy[taskID]['Strategy'] = SET_UNUSED # RESET_REQUEST retryStrategy[taskID]['RequestID'] = result[taskID]['RequestID'] return S_OK(retryStrategy) def check_assigned_files(self, actions, transFiles, transType): """ treatment for transformation files with assigned status """ for transFile in transFiles: if transFile['AvailableOnSource'] and transFile['AvailableOnTarget']: if transType == REPLICATION_TRANS: actions[SET_PROCESSED].append(transFile) if transType == MOVING_TRANS: actions[RETRY].append(transFile) elif transFile['AvailableOnSource'] and not transFile['AvailableOnTarget']: actions[RETRY].append(transFile) elif not transFile['AvailableOnSource'] and transFile['AvailableOnTarget']: actions[SET_PROCESSED].append(transFile) else: # not on src and target actions[SET_DELETED].append(transFile) def check_unused_files(self, actions, transFiles, transType): """ treatment for transformation files with unused status """ for transFile in transFiles: if not transFile['AvailableOnSource'] and transFile['AvailableOnTarget']: actions[SET_PROCESSED].append(transFile) if not transFile['AvailableOnSource'] and not transFile['AvailableOnTarget']: actions[SET_DELETED].append(transFile) def check_processed_files(self, actions, transFiles, transType): """ treatment for transformation files with processed status """ for transFile in transFiles: if transFile['AvailableOnSource'] and transFile['AvailableOnTarget'] and transType == MOVING_TRANS: actions[RETRY].append(transFile) if transFile['AvailableOnSource'] and not transFile['AvailableOnTarget']: actions[RETRY].append(transFile) if not transFile['AvailableOnSource'] and not transFile['AvailableOnTarget']: actions[SET_DELETED].append(transFile) def check_problematic_files(self, actions, transFiles, transType): """ treatment for transformation files with problematic status """ for transFile in transFiles: if transFile['AvailableOnSource'] and transFile['AvailableOnTarget']: if transType == REPLICATION_TRANS: actions[SET_PROCESSED].append(transFile) if transType == MOVING_TRANS: actions[RETRY].append(transFile) elif transFile['AvailableOnSource'] and not transFile['AvailableOnTarget']: actions[RETRY].append(transFile) elif not transFile['AvailableOnSource'] and transFile['AvailableOnTarget']: actions[SET_PROCESSED].append(transFile) else: # not available on source and target actions[SET_DELETED].append(transFile) def retryFiles(self, transID, transFiles): """ resubmits request or sets file status to unused based on the retry strategy of transformation file """ setFilesUnused = [] setFilesAssigned = [] res = self.retryStrategyForFiles(transID, transFiles) if not res['OK']: self.logError('Failure to determine retry strategy (unused / reset request) for files ', "%s" % res['Message']) return res retryStrategy = res['Value'] for transFile in transFiles: if retryStrategy[transFile['TaskID']]['Strategy'] != RESET_REQUEST: setFilesUnused.append(transFile) continue requestID = retryStrategy[transFile['TaskID']]['RequestID'] if self.enabled: res = self.reqClient.resetFailedRequest(requestID, allR=True) if not res['OK']: self.logError('Failed to reset request ', 'ReqID: %s Error: %s' % (requestID, res['Message'])) continue if res['Value'] == "Not reset": self.logError('Failed to reset request ', 'ReqID: %s is non-recoverable' % requestID) continue setFilesAssigned.append(transFile) res = self.tClient.setTaskStatus(transID, transFile['TaskID'], 'Waiting') if not res['OK']: self.logError('Failure to set Waiting status for Task ID: ', "%s %s" % (transFile['TaskID'], res['Message'])) continue self.accounting[RESET_REQUEST].append({'LFN': transFile['LFN'], 'Status': transFile['Status'], 'AvailableOnSource': transFile['AvailableOnSource'], 'AvailableOnTarget': transFile['AvailableOnTarget']}) if setFilesUnused: self.setFileStatus(transID, setFilesUnused, 'Unused') if setFilesAssigned: self.setFileStatus(transID, setFilesAssigned, 'Assigned') return S_OK() def applyActions(self, transID, actions): """ sets new file statuses and resets requests """ for action, transFiles in actions.iteritems(): if action == SET_PROCESSED and transFiles: self.setFileStatus(transID, transFiles, 'Processed') if action == SET_DELETED and transFiles: self.setFileStatus(transID, transFiles, 'Deleted') if action == RETRY and transFiles: # if there is a request in RMS then reset request otherwise set file status unused self.retryFiles(transID, transFiles) def existsInFC(self, storageElements, lfns): """ checks if files have replicas registered in File Catalog for all given storageElements """ res = self.fcClient.getReplicas(lfns) if not res['OK']: return res result = {} result['Successful'] = {} result['Failed'] = {} setOfSEs = set(storageElements) for lfn, msg in res['Value']['Failed'].iteritems(): if msg == 'No such file or directory': result['Successful'][lfn] = False else: result['Failed'][lfn] = msg # check if all replicas are registered in FC filesFoundInFC = res['Value']['Successful'] for lfn, replicas in filesFoundInFC.iteritems(): result['Successful'][lfn] = setOfSEs.issubset(replicas.keys()) return S_OK(result) def existsOnSE(self, storageElements, lfns): """ checks if the given files exist physically on a list of storage elements""" result = {} result['Failed'] = {} result['Successful'] = {} if not lfns: return S_OK(result) voName = lfns[0].split('/')[1] for se in storageElements: res = StorageElement(se, vo=voName).exists(lfns) if not res['OK']: return res for lfn, status in res['Value']['Successful'].iteritems(): if lfn not in result['Successful']: result['Successful'][lfn] = status if not status: result['Successful'][lfn] = False result['Failed'][se] = res['Value']['Failed'] return S_OK(result) def exists(self, storageElements, lfns): """ checks if files exists on both file catalog and storage elements """ fcRes = self.existsInFC(storageElements, lfns) if not fcRes['OK']: self.logError('Failure to determine if files exists in File Catalog ', "%s" % fcRes['Message']) return fcRes if fcRes['Value']['Failed']: self.logError("Failed FileCatalog Response ", "%s" % fcRes['Value']['Failed']) # check if files found in file catalog also exist on SE checkLFNsOnStorage = [lfn for lfn in fcRes['Value']['Successful'] if fcRes['Value']['Successful'][lfn]] # no files were found in FC, return the result instead of verifying them on SE if not checkLFNsOnStorage: return fcRes seRes = self.existsOnSE(storageElements, checkLFNsOnStorage) if not seRes['OK']: self.logError('Failure to determine if files exist on SE ', "%s" % seRes['Message']) return seRes for se in storageElements: if seRes['Value']['Failed'][se]: self.logError('Failed to determine if files exist on SE ', "%s %s" % (se, seRes['Value']['Failed'][se])) return S_ERROR() fcResult = fcRes['Value']['Successful'] seResult = seRes['Value']['Successful'] for lfn in fcResult: if fcResult[lfn] and not seResult[lfn]: fcRes['Value']['Successful'][lfn] = False return fcRes def processTransformation(self, transID, sourceSE, targetSEs, transType): """ process transformation for a given transformation ID """ actions = {} actions[SET_PROCESSED] = [] actions[RETRY] = [] actions[SET_DELETED] = [] for status in self.transformationFileStatuses: res = self.tClient.getTransformationFiles(condDict={'TransformationID': transID, 'Status': status}) if not res['OK']: errStr = 'Failure to get Transformation Files, Status: %s Transformation ID: %s Message: %s' % (status, transID, res['Message']) self.logError(errStr) continue transFiles = res['Value'] if not transFiles: self.log.notice("No Transformation Files found with status %s for Transformation ID %d" % (status, transID)) continue self.log.notice("Processing Transformation Files with status %s for TransformationID %d " % (status, transID)) if status == 'Assigned': transFiles = filter(self.selectFailedRequests, transFiles) lfns = [transFile['LFN'] for transFile in transFiles] if not lfns: continue res = self.exists(sourceSE, lfns) if not res['OK']: continue resultSourceSe = res['Value']['Successful'] res = self.exists(targetSEs, lfns) if not res['OK']: continue resultTargetSEs = res['Value']['Successful'] for transFile in transFiles: lfn = transFile['LFN'] transFile['AvailableOnSource'] = resultSourceSe[lfn] transFile['AvailableOnTarget'] = resultTargetSEs[lfn] checkFilesFuncName = "check_%s_files" % status.lower() checkFiles = getattr(self, checkFilesFuncName) checkFiles(actions, transFiles, transType) self.applyActions(transID, actions) self.sendNotification(transID, transType, sourceSE, targetSEs) return S_OK()
def run(self): """ Do actions required to notify users. Mandatory keyword arguments: - Granularity Optional keyword arguments: - SiteType - ServiceType - ResourceType """ # Initializing variables nc = NotificationClient() # raise alarms, right now makes a simple notification if 'Granularity' not in self.kw['Params'].keys(): raise ValueError, "You have to provide a argument Granularity = <desired_granularity>" if self.new_status['Action']: notif = "%s %s is perceived as" % (self.granularity, self.name) notif = notif + " %s. Reason: %s." % (self.new_status['Status'], self.new_status['Reason']) users_to_notify = self._getUsersToNotify() for notif in users_to_notify: for user in notif['Users']: if 'Web' in notif['Notifications']: gLogger.info("Sending web notification to user %s" % user) nc.addNotificationForUser(user, notif) if 'Mail' in notif['Notifications']: gLogger.info("Sending mail notification to user %s" % user) was = self.rsClient.getElementHistory( self.granularity, elementName=self.name, statusType=self.status_type, meta={ "order": "DESC", 'limit': 1, "columns": ['Status', 'Reason', 'DateEffective'] }) #[0] if not was['OK']: gLogger.error(was['Message']) continue was = was['Value'][0] mailMessage = """ ---TESTING--- -------------------------------------------------------------------------------- RSS changed the status of the following resource: Granularity:\t%s Name:\t\t%s New status:\t%s Reason:\t\t%s Was:\t\t%s (%s) since %s Setup:\t\t%s If you think RSS took the wrong decision, please set the status manually: Use: dirac-rss-set-status -g <granularity> -n <element_name> -s <desired_status> [-t status_type] (if you omit the optional last part of the command, all status types are matched.) This notification has been sent according to those parameters: %s """ % (self.granularity, self.name, self.new_status['Status'], self.new_status['Reason'], was[0], was[1], was[2], CS.getSetup(), str(users_to_notify)) # Actually send the mail! resUser = self.rmClient.getUserRegistryCache(user) if not resUser['OK']: gLogger.error(resUser['Message']) continue resUser = resUser['Value'][0][2] nc.sendMail( resUser, '[RSS][%s][%s] %s -> %s' % (self.granularity, self.name, self.new_status['Status'], was[0]), mailMessage) ################################################################################ #EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF
def execute( self ): """ General agent execution method """ # Get a "fresh" copy of the CS data result = self.csAPI.downloadCSData() if not result['OK']: self.log.warn( "Could not download a fresh copy of the CS data", result[ 'Message' ] ) # Execute command to retrieve storage usage information cmdTuple = ['lcg-infosites', '--vo', self.voName , 'se'] self.log.info( "Executing %s" % cmdTuple ) ret = systemCall( 0, cmdTuple, env = {'LCG_GFAL_INFOSYS':self.bdii} ) if not ret['OK']: return ret elif not ret['Value'][1] != '': self.log.error( "Error while executing %s" % cmdTuple ) return S_ERROR() # initialize sedict sedict = {} for SE in self.productionSEs: sedict[SE] = [] fields = ['SE', 'Available(TB)', 'Used(TB)', 'Total(TB)', 'Used(%)' ] records = [] fullSEList = [] for se in ret['Value'][1].split( '\n' ): if len( se.split() ) == 4: spacedict = {} SE = se.split()[3] if SE in self.productionSEs and se.split()[0] != 'n.a' and se.split()[1] != 'n.a': # ## convert into TB available = float( se.split()[0] ) / 1e9 used = float( se.split()[1] ) / 1e9 spacedict['Available'] = available spacedict['Used'] = used spacedict['Total'] = available + used sedict[SE].append(spacedict) for SE in self.productionSEs: for spacedict in sedict[SE]: available = '%.1f' % spacedict['Available'] used = '%.1f' % spacedict['Used'] total = '%.1f' % spacedict['Total'] fraction_used = spacedict['Used'] / spacedict['Total'] * 100 if fraction_used > 90.: fullSEList.append(SE) self.log.warn( "%s full at %.1f%%" % (SE, fraction_used) ) fraction_used = '%.1f' % fraction_used records.append( [SE, available, used, total, fraction_used] ) body = printTable( fields, records, printOut = False ) if len(fullSEList) > 0: self.subject = 'CRITICAL storage usage beyond 90%%: %s' % ( ', '.join( fullSEList ) ) if self.addressTo and self.addressFrom: notification = NotificationClient() result = notification.sendMail( self.addressTo, self.subject, body, self.addressFrom, localAttempt = False ) if not result['OK']: self.log.error( 'Can not send notification mail', result['Message'] ) return S_OK()
try: headers = dict( ( i.strip() , j.strip()) for i , j in ( item.split( ':' ) for item in head.split( '\\n' ) ) ) except: gLogger.error( "Failed to convert string: %s to email headers" % head ) DIRACexit( 4 ) if not "To" in headers: gLogger.error( "Failed to get 'To:' field from headers %s" % head ) DIRACexit( 5 ) to = headers[ "To" ] origin = "%s@%s" %( os.getenv( "LOGNAME" , "dirac" ) , socket.getfqdn() ) if "From" in headers: origin = headers[ "From" ] subject = "Sent from %s" % socket.getfqdn() if "Subject" in headers: subject = headers[ "Subject" ] ntc = NotificationClient() print "sendMail(%s,%s,%s,%s,%s)" % ( to , subject , body , origin , False ) result = ntc.sendMail( to , subject , body , origin , localAttempt = False ) if not result[ "OK" ]: gLogger.error( result[ "Message" ] ) DIRACexit( 6 ) DIRACexit( 0 )
class ErrorMessageMonitor(AgentModule): def initialize(self): self.systemLoggingDB = SystemLoggingDB() self.notification = NotificationClient() userList = self.am_getOption("Reviewer", []) self.log.debug("Users to be notified:", ', '.join(userList)) mailList = [] for user in userList: mail = getUserOption(user, 'Email', '') if not mail: self.log.warn("Could not get user's mail", user) else: mailList.append(mail) if not mailList: mailList = Operations().getValue('EMail/Logging', []) if not len(mailList): errString = "There are no valid users in the mailing list" varString = "[" + ','.join(userList) + "]" self.log.warn(errString, varString) self.log.info("List of mails to be notified", ','.join(mailList)) self._mailAddress = mailList self._subject = 'New error messages were entered in the SystemLoggingDB' return S_OK() def execute(self): """ The main agent execution method """ condDict = {'ReviewedMessage': 0} result = self.systemLoggingDB.getCounters('FixedTextMessages', ['ReviewedMessage'], condDict) if not result['OK']: return result if not result['Value']: self.log.info('No messages need review') return S_OK('No messages need review') returnFields = [ 'FixedTextID', 'FixedTextString', 'SystemName', 'SubSystemName' ] result = self.systemLoggingDB._queryDB(showFieldList=returnFields, groupColumn='FixedTextString', condDict=condDict) if not result['OK']: self.log.error('Failed to obtain the non reviewed Strings', result['Message']) return S_OK() messageList = result['Value'] if messageList == 'None' or not messageList: self.log.error('The DB query returned an empty result') return S_OK() mailBody = 'These new messages have arrived to the Logging Service\n' for message in messageList: mailBody = mailBody + "String: '" + message[1] + "'\tSystem: '" \ + message[2] + "'\tSubsystem: '" + message[3] + "'\n" if self._mailAddress: result = self.notification.sendMail(self._mailAddress, self._subject, mailBody) if not result['OK']: self.log.warn("The mail could not be sent") return S_OK() messageIDs = [message[0] for message in messageList] condDict = {'FixedTextID': messageIDs} result = self.systemLoggingDB.updateFields('FixedTextMessages', ['ReviewedMessage'], [1], condDict=condDict) if not result['OK']: self.log.error('Could not update message Status', result['ERROR']) return S_OK() self.log.verbose('Updated message Status for:', str(messageList)) self.log.info("The messages have been sent for review", "There are %s new descriptions" % len(messageList)) return S_OK("%s Messages have been sent for review" % len(messageList))
comment) if not result['OK']: gLogger.error("Error setting comment in CS") #Commit the changes if nothing has failed and the CS has been modified if modifiedCS: result = diracAdmin.csCommitChanges(False) if not result[ 'OK' ]: gLogger.error('Commit failed with message = %s' % (result[ 'Message' ])) dexit(255) else: gLogger.info('Successfully committed changes to CS') notifyClient = NotificationClient() gLogger.info('Sending mail for software installation %s' % (mailadress)) res = getProxyInfo() if not res['OK']: sender = '*****@*****.**' else: #sender = res['Value'][''] if 'username' in res['Value']: sender = getUserOption(res['Value']['username'],'Email') else: sender = '*****@*****.**' res = notifyClient.sendMail(mailadress, subject, msg, sender, localAttempt = False) if not res[ 'OK' ]: gLogger.error('The mail could not be sent') else: gLogger.info('No modifications to CS required') gLogger.notice("All done!") dexit(0)
def __infoFromCE(self): sitesSection = cfgPath('Resources', 'Sites') result = gConfig.getSections(sitesSection) if not result['OK']: return grids = result['Value'] changed = False body = "" for grid in grids: gridSection = cfgPath(sitesSection, grid) result = gConfig.getSections(gridSection) if not result['OK']: return sites = result['Value'] for site in sites: siteSection = cfgPath(gridSection, site) opt = gConfig.getOptionsDict(siteSection)['Value'] name = opt.get('Name', '') if name: coor = opt.get('Coordinates', 'Unknown') mail = opt.get('Mail', 'Unknown') result = ldapSite(name) if not result['OK']: self.log.warn("BDII site %s: %s" % (name, result['Message'])) result = self.__checkAlternativeBDIISite( ldapSite, name) if result['OK']: bdiiSites = result['Value'] if len(bdiiSites) == 0: self.log.warn(name, "Error in BDII: leng = 0") else: if not len(bdiiSites) == 1: self.log.warn( name, "Warning in BDII: leng = %d" % len(bdiiSites)) bdiiSite = bdiiSites[0] try: longitude = bdiiSite['GlueSiteLongitude'] latitude = bdiiSite['GlueSiteLatitude'] newcoor = "%s:%s" % (longitude, latitude) except: self.log.warn("Error in BDII coordinates") newcoor = "Unknown" try: newmail = bdiiSite[ 'GlueSiteSysAdminContact'].split( ":")[-1].strip() except: self.log.warn("Error in BDII mail") newmail = "Unknown" self.log.debug("%s %s %s" % (name, newcoor, newmail)) if newcoor != coor: self.log.info("%s" % (name), "%s -> %s" % (coor, newcoor)) if coor == 'Unknown': self.csAPI.setOption( cfgPath(siteSection, 'Coordinates'), newcoor) else: self.csAPI.modifyValue( cfgPath(siteSection, 'Coordinates'), newcoor) changed = True if newmail != mail: self.log.info("%s" % (name), "%s -> %s" % (mail, newmail)) if mail == 'Unknown': self.csAPI.setOption( cfgPath(siteSection, 'Mail'), newmail) else: self.csAPI.modifyValue( cfgPath(siteSection, 'Mail'), newmail) changed = True ceList = List.fromChar(opt.get('CE', '')) if not ceList: self.log.warn(site, 'Empty site list') continue # result = gConfig.getSections( cfgPath( siteSection,'CEs' ) # if not result['OK']: # self.log.debug( "Section CEs:", result['Message'] ) for ce in ceList: ceSection = cfgPath(siteSection, 'CEs', ce) result = gConfig.getOptionsDict(ceSection) if not result['OK']: self.log.debug("Section CE", result['Message']) wnTmpDir = 'Unknown' arch = 'Unknown' os = 'Unknown' si00 = 'Unknown' pilot = 'Unknown' ceType = 'Unknown' else: ceopt = result['Value'] wnTmpDir = ceopt.get('wnTmpDir', 'Unknown') arch = ceopt.get('architecture', 'Unknown') os = ceopt.get('OS', 'Unknown') si00 = ceopt.get('SI00', 'Unknown') pilot = ceopt.get('Pilot', 'Unknown') ceType = ceopt.get('CEType', 'Unknown') result = ldapCE(ce) if not result['OK']: self.log.warn('Error in BDII for %s' % ce, result['Message']) result = self.__checkAlternativeBDIISite(ldapCE, ce) continue try: bdiiCE = result['Value'][0] except: self.log.warn('Error in BDII for %s' % ce, result) bdiiCE = None if bdiiCE: try: newWNTmpDir = bdiiCE['GlueSubClusterWNTmpDir'] except: newWNTmpDir = 'Unknown' if wnTmpDir != newWNTmpDir and newWNTmpDir != 'Unknown': section = cfgPath(ceSection, 'wnTmpDir') self.log.info(section, " -> ".join( (wnTmpDir, newWNTmpDir))) if wnTmpDir == 'Unknown': self.csAPI.setOption(section, newWNTmpDir) else: self.csAPI.modifyValue(section, newWNTmpDir) changed = True try: newArch = bdiiCE[ 'GlueHostArchitecturePlatformType'] except: newArch = 'Unknown' if arch != newArch and newArch != 'Unknown': section = cfgPath(ceSection, 'architecture') self.log.info(section, " -> ".join( (arch, newArch))) if arch == 'Unknown': self.csAPI.setOption(section, newArch) else: self.csAPI.modifyValue(section, newArch) changed = True try: newOS = '_'.join( (bdiiCE['GlueHostOperatingSystemName'], bdiiCE['GlueHostOperatingSystemVersion'], bdiiCE['GlueHostOperatingSystemRelease'])) except: newOS = 'Unknown' if os != newOS and newOS != 'Unknown': section = cfgPath(ceSection, 'OS') self.log.info(section, " -> ".join((os, newOS))) if os == 'Unknown': self.csAPI.setOption(section, newOS) else: self.csAPI.modifyValue(section, newOS) changed = True body = body + "OS was changed %s -> %s for %s at %s\n" % ( os, newOS, ce, site) try: newSI00 = bdiiCE['GlueHostBenchmarkSI00'] except: newSI00 = 'Unknown' if si00 != newSI00 and newSI00 != 'Unknown': section = cfgPath(ceSection, 'SI00') self.log.info(section, " -> ".join( (si00, newSI00))) if si00 == 'Unknown': self.csAPI.setOption(section, newSI00) else: self.csAPI.modifyValue(section, newSI00) changed = True try: rte = bdiiCE[ 'GlueHostApplicationSoftwareRunTimeEnvironment'] for vo in self.voName: if vo.lower() == 'lhcb': if 'VO-lhcb-pilot' in rte: newPilot = 'True' else: newPilot = 'False' else: newPilot = 'Unknown' except: newPilot = 'Unknown' if pilot != newPilot and newPilot != 'Unknown': section = cfgPath(ceSection, 'Pilot') self.log.info(section, " -> ".join( (pilot, newPilot))) if pilot == 'Unknown': self.csAPI.setOption(section, newPilot) else: self.csAPI.modifyValue(section, newPilot) changed = True newVO = '' for vo in self.voName: result = ldapCEState(ce, vo) #getBDIICEVOView if not result['OK']: self.log.warn('Error in BDII for queue %s' % ce, result['Message']) result = self.__checkAlternativeBDIISite( ldapCEState, ce, vo) continue try: queues = result['Value'] except: self.log.warn('Error in BDII for queue %s' % ce, result['Massage']) continue newCEType = 'Unknown' for queue in queues: try: queueType = queue['GlueCEImplementationName'] except: queueType = 'Unknown' if newCEType == 'Unknown': newCEType = queueType else: if queueType != newCEType: self.log.warn( 'Error in BDII for CE %s ' % ce, 'different CE types %s %s' % (newCEType, queueType)) if newCEType == 'ARC-CE': newCEType = 'ARC' if ceType != newCEType and newCEType != 'Unknown': section = cfgPath(ceSection, 'CEType') self.log.info(section, " -> ".join( (ceType, newCEType))) if ceType == 'Unknown': self.csAPI.setOption(section, newCEType) else: self.csAPI.modifyValue(section, newCEType) changed = True for queue in queues: try: queueName = queue['GlueCEUniqueID'].split( '/')[-1] except: self.log.warn('Error in queueName ', queue) continue try: newMaxCPUTime = queue['GlueCEPolicyMaxCPUTime'] except: newMaxCPUTime = None newSI00 = None try: caps = queue['GlueCECapability'] if type(caps) == type(''): caps = [caps] for cap in caps: if cap.count('CPUScalingReferenceSI00'): newSI00 = cap.split('=')[-1] except: newSI00 = None queueSection = cfgPath(ceSection, 'Queues', queueName) result = gConfig.getOptionsDict(queueSection) if not result['OK']: self.log.warn("Section Queues", result['Message']) maxCPUTime = 'Unknown' si00 = 'Unknown' allowedVOs = [''] else: queueOpt = result['Value'] maxCPUTime = queueOpt.get( 'maxCPUTime', 'Unknown') si00 = queueOpt.get('SI00', 'Unknown') if newVO == '': # Remember previous iteration, if none - read from conf allowedVOs = queueOpt.get('VO', '').split(",") else: # Else use newVO, as it can contain changes, which aren't in conf yet allowedVOs = newVO.split(",") if newMaxCPUTime and (maxCPUTime != newMaxCPUTime): section = cfgPath(queueSection, 'maxCPUTime') self.log.info( section, " -> ".join( (maxCPUTime, newMaxCPUTime))) if maxCPUTime == 'Unknown': self.csAPI.setOption( section, newMaxCPUTime) else: self.csAPI.modifyValue( section, newMaxCPUTime) changed = True if newSI00 and (si00 != newSI00): section = cfgPath(queueSection, 'SI00') self.log.info(section, " -> ".join( (si00, newSI00))) if si00 == 'Unknown': self.csAPI.setOption(section, newSI00) else: self.csAPI.modifyValue(section, newSI00) changed = True modifyVO = True # Flag saying if we need VO option to change newVO = '' if allowedVOs != ['']: for allowedVO in allowedVOs: allowedVO = allowedVO.strip( ) # Get rid of spaces newVO += allowedVO if allowedVO == vo: # Current VO has been already in list newVO = '' modifyVO = False # Don't change anything break # Skip next 'if', proceed to next VO newVO += ', ' if modifyVO: section = cfgPath(queueSection, 'VO') newVO += vo self.log.info( section, " -> ".join( ('%s' % allowedVOs, newVO))) if allowedVOs == ['']: self.csAPI.setOption(section, newVO) else: self.csAPI.modifyValue(section, newVO) changed = True if changed: self.log.info(body) if body and self.addressTo and self.addressFrom: notification = NotificationClient() result = notification.sendMail(self.addressTo, self.subject, body, self.addressFrom, localAttempt=False) return self.csAPI.commit() else: self.log.info("No changes found") return S_OK()
if __name__== "__main__": cli = Params() cli.registerSwitchs() Script.parseCommandLine( ignoreErrors = True ) from DIRAC import gLogger, exit as DIRACexit from DIRAC.FrameworkSystem.Client.NotificationClient import NotificationClient if not cli.to or not cli.fr or not cli.subject: gLogger.error( "Missing argument" ) DIRACexit( 2 ) if not cli.body and not cli.filename: gLogge.error("Missing body") DIRACexit(2) if cli.filename: cli.body = "".join(file(cli.filename,"r").readlines()) ntc = NotificationClient() gLogger.verbose("Sending:"," ".join([cli.to , cli.subject , cli.body , cli.fr] )) print "sendMail(%s,%s,%s,%s,%s)" % ( cli.to , cli.subject , cli.body , cli.fr , False ) result = ntc.sendMail( cli.to , cli.subject , cli.body , cli.fr , localAttempt = False ) if not result[ "OK" ]: gLogger.error( result[ "Message" ] ) DIRACexit( 6 ) DIRACexit( 0 )
def __lookForCE( self ): knownces = self.am_getOption( 'BannedCEs', [] ) result = gConfig.getSections( '/Resources/Sites' ) if not result['OK']: return grids = result['Value'] for grid in grids: result = gConfig.getSections( '/Resources/Sites/%s' % grid ) if not result['OK']: return sites = result['Value'] for site in sites: opt = gConfig.getOptionsDict( '/Resources/Sites/%s/%s' % ( grid, site ) )['Value'] ces = List.fromChar( opt.get( 'CE', '' ) ) knownces += ces response = ldapCEState( '', vo = self.voName ) if not response['OK']: self.log.error( "Error during BDII request", response['Message'] ) response = self.__checkAlternativeBDIISite( ldapCEState, '', self.voName ) return response newces = {} for queue in response['Value']: try: queuename = queue['GlueCEUniqueID'] except: continue cename = queuename.split( ":" )[0] if not cename in knownces: newces[cename] = None self.log.debug( "newce", cename ) body = "" possibleNewSites = [] for ce in newces.iterkeys(): response = ldapCluster( ce ) if not response['OK']: self.log.warn( "Error during BDII request", response['Message'] ) response = self.__checkAlternativeBDIISite( ldapCluster, ce ) continue clusters = response['Value'] if len( clusters ) != 1: self.log.warn( "Error in cluster length", " CE %s Length %d" % ( ce, len( clusters ) ) ) if len( clusters ) == 0: continue cluster = clusters[0] fkey = cluster.get( 'GlueForeignKey', [] ) if type( fkey ) == type( '' ): fkey = [fkey] nameBDII = None for entry in fkey: if entry.count( 'GlueSiteUniqueID' ): nameBDII = entry.split( '=' )[1] break if not nameBDII: continue cestring = "CE: %s, GOCDB Name: %s" % ( ce, nameBDII ) self.log.info( cestring ) response = ldapCE( ce ) if not response['OK']: self.log.warn( "Error during BDII request", response['Message'] ) response = self.__checkAlternativeBDIISite( ldapCE, ce ) continue ceinfos = response['Value'] if len( ceinfos ): ceinfo = ceinfos[0] systemName = ceinfo.get( 'GlueHostOperatingSystemName', 'Unknown' ) systemVersion = ceinfo.get( 'GlueHostOperatingSystemVersion', 'Unknown' ) systemRelease = ceinfo.get( 'GlueHostOperatingSystemRelease', 'Unknown' ) else: systemName = "Unknown" systemVersion = "Unknown" systemRelease = "Unknown" osstring = "SystemName: %s, SystemVersion: %s, SystemRelease: %s" % ( systemName, systemVersion, systemRelease ) self.log.info( osstring ) response = ldapCEState( ce, vo = self.voName ) if not response['OK']: self.log.warn( "Error during BDII request", response['Message'] ) response = self.__checkAlternativeBDIISite( ldapCEState, ce, self.voName ) continue newcestring = "\n\n%s\n%s" % ( cestring, osstring ) usefull = False cestates = response['Value'] for cestate in cestates: queuename = cestate.get( 'GlueCEUniqueID', 'UnknownName' ) queuestatus = cestate.get( 'GlueCEStateStatus', 'UnknownStatus' ) queuestring = "%s %s" % ( queuename, queuestatus ) self.log.info( queuestring ) newcestring += "\n%s" % queuestring if queuestatus.count( 'Production' ): usefull = True if usefull: body += newcestring possibleNewSites.append( 'dirac-admin-add-site DIRACSiteName %s %s' % ( nameBDII, ce ) ) if body: body = "We are glad to inform You about new CE(s) possibly suitable for %s:\n" % self.voName + body body += "\n\nTo suppress information about CE add its name to BannedCEs list." for possibleNewSite in possibleNewSites: body = "%s\n%s" % ( body, possibleNewSite ) self.log.info( body ) if self.addressTo and self.addressFrom: notification = NotificationClient() result = notification.sendMail( self.addressTo, self.subject, body, self.addressFrom, localAttempt = False ) return S_OK()
class UserStorageQuotaAgent(AgentModule): """ .. class:: UserStorageQuotaAgent :param int deafultQuota: default quota in MB :param NotificationClient notificationClient: NotificationClient instance :param StorageUsageDB storageUsageDB: StorageUsageDB or RPC client pointing to StorageUsageDB """ defaultQuota = 1000 notificationClient = None storageUsageDB = None def __init__(self, *args, **kwargs): ''' c'tor ''' AgentModule.__init__(self, *args, **kwargs) self.notificationClient = NotificationClient() try: self.storageUsageDB = StorageUsageDB() except SystemExit: self.storageUsageDB = RPCClient('DataManagement/StorageUsage') self.defaultQuota = gConfig.getValue( '/Registry/DefaultStorageQuota', self.defaultQuota) # Default is 1TB def initialize(self): """ agent initialisation :param self: self reference """ # This sets the Default Proxy to used as that defined under # /Operations/Shifter/DataManager # the shifterProxy option in the Configuration can be used to change this default. self.am_setOption('shifterProxy', 'DataManager') self.log.info("initialize: Default quota found to be %d GB" % self.defaultQuota) return S_OK() def execute(self): """ execution of one cycle :param self: self reference """ res = self.storageUsageDB.getUserStorageUsage() usageDict = res['Value'] byteToGB = 1000 * 1000 * 1000.0 managerMsg = "" errorMsg = "" self.log.info("Determining quota usage for %s users." % len(usageDict)) for userName in sorted(usageDict): usageGB = usageDict[userName] / byteToGB res = gConfig.getOptionsDict('/Registry/Users/%s' % userName) if not res['OK']: msg = "Username not found in the CS: %s using %.2f GB" % ( userName, usageGB) errorMsg += msg + '\n' self.log.error(msg) continue elif 'Email' not in res['Value']: msg = "CS does not contain email information for user %s" % userName errorMsg += msg + '\n' self.log.error(msg) continue elif "Quota" not in res['Value']: userQuota = float(self.defaultQuota) else: userQuota = float(res['Value']['Quota']) userMail = res['Value']['Email'] # Different behaviour for 90% exceeded, 110% exceeded and 150% exceeded msg = None if (1.5 * userQuota) < usageGB: msg = "%s is at %d%s of quota %d GB (%.1f GB)." % ( userName, (usageGB * 100) / userQuota, '%', userQuota, usageGB) self.log.info(msg) self.sendBlockedMail(userName, userMail, userQuota, usageGB) self.log.info( "!!!!!!!!!!!!!!!!!!!!!!!!REMEMBER TO MODIFY THE ACLs and STATUS HERE!!!!!!!!!!!!!!!!!" ) elif (1.0 * userQuota) < usageGB: msg = "%s is at %d%s of quota %d GB (%.1f GB)." % ( userName, (usageGB * 100) / userQuota, '%', userQuota, usageGB) self.log.info(msg) self.sendSecondWarningMail(userName, userMail, userQuota, usageGB) elif (0.9 * userQuota) < usageGB: msg = "%s is at %d%s of quota %d GB (%.1f GB)." % ( userName, (usageGB * 100) / userQuota, '%', userQuota, usageGB) self.log.info(msg) self.sendFirstWarningMail(userName, userMail, userQuota, usageGB) if msg: managerMsg += msg + "\n" if managerMsg or errorMsg: if managerMsg: managerMsg = "Mails have been sent to the following list of users being close to or above quota:\n\n" + managerMsg if errorMsg: managerMsg += "\nThe following errors have been found by the UserStorageQuotaAgent:\n" + errorMsg fromAddress = 'LHCb Data Manager <*****@*****.**>' toAddress = '*****@*****.**' self.notificationClient.sendMail(toAddress, "User quota warnings", managerMsg, fromAddress) return S_OK() def sendFirstWarningMail(self, userName, userMail, quota, usage): """ first warning email :param self: self reference :param str userName: DIRAC user name :param str userMail: email address :param int quota: default quota :param float usage: space currently used """ msgbody = """ This mail has been generated automatically. You have received this mail because you are approaching your Grid storage usage quota of %s GB. You are currently using %.1f GB. Please reduce you usage by removing some files. If you have reduced your usage in the last 24 hours please ignore this message. Explanations can be found at https://twiki.cern.ch/twiki/bin/view/LHCb/GridStorageQuota """ % (int(quota), usage) fromAddress = 'LHCb Data Manager <*****@*****.**>' subject = 'Grid storage use near quota (%s)' % userName toAddress = userMail self.notificationClient.sendMail(toAddress, subject, msgbody, fromAddress) def sendSecondWarningMail(self, userName, userMail, quota, usage): """ second warning email :param self: self reference :param str userName: DIRAC user name :param str userMail: email address :param int quota: default quota :param float usage: space currently used """ msgbody = """ This mail has been generated automatically. You have received this mail because your Grid storage usage has exceeded your quota of %sGB. You are currently using %.1f GB. Please reduce you usage by removing some files. If you have reduced your usage in the last 24 hours please ignore this message. Explanations can be found at https://twiki.cern.ch/twiki/bin/view/LHCb/GridStorageQuota """ % (int(quota), usage) fromAddress = 'LHCb Data Manager <*****@*****.**>' subject = 'Grid storage use over quota (%s)' % userName toAddress = userMail self.notificationClient.sendMail(toAddress, subject, msgbody, fromAddress) def sendBlockedMail(self, userName, userMail, quota, usage): """ send blocked email :param self: self reference :param str userName: DIRAC user name :param str userMail: email adress :param int quota: default quota :param float usage: space used """ msgbody = """ This mail has been generated automatically. You have received this mail because your Grid storage usage has exceeded your quota of %s GB. You are currently using %.1f GB. Your account could soon been given a lower priority and your jobs will run at a lower pace if you don't create space. If you have reduced your usage in the last 24 hours please ignore this message. Explanations can be found at https://twiki.cern.ch/twiki/bin/view/LHCb/GridStorageQuota """ % (int(quota), usage) fromAddress = 'LHCb Data Manager <*****@*****.**>' subject = 'Grid storage use blocked (%s)' % userName toAddress = userMail self.notificationClient.sendMail(toAddress, subject, msgbody, fromAddress)
class TokenAgent(AgentModule): ''' TokenAgent is in charge of checking tokens assigned on resources. Notifications are sent to those users owning expiring tokens. ''' # Too many public methods # pylint: disable-msg=R0904 def initialize(self): ''' TokenAgent initialization ''' # Attribute defined outside __init__ # pylint: disable-msg=W0201 self.notifyHours = self.am_getOption('notifyHours', 10) try: self.rsClient = ResourceStatusClient() self.rmClient = ResourceManagementClient() self.noClient = NotificationClient() return S_OK() except Exception: errorStr = "TokenAgent initialization" self.log.exception(errorStr) return S_ERROR(errorStr) def execute(self): ''' The main TokenAgent execution method. Checks for tokens owned by users that are expiring, and notifies those users. Calls rsClient.setToken() to set 'RS_SVC' as owner for those tokens that expired. ''' adminMail = '' try: reason = 'Out of date token' #reAssign the token to RS_SVC #for g in self.ELEMENTS: validElements = RssConfiguration.getValidElements() for granularity in validElements: tokensExpired = self.rsClient.getTokens( granularity, tokenExpiration=datetime.datetime.utcnow()) if tokensExpired['Value']: adminMail += '\nLIST OF EXPIRED %s TOKENS\n' % granularity adminMail += '%s|%s|%s\n' % ( 'user'.ljust(20), 'name'.ljust(15), 'status type') for token in tokensExpired['Value']: name = token[1] stype = token[2] user = token[9] self.rsClient.setToken( granularity, name, stype, reason, 'RS_SVC', datetime.datetime(9999, 12, 31, 23, 59, 59)) adminMail += ' %s %s %s\n' % (user.ljust(20), name.ljust(15), stype) #notify token owners inNHours = datetime.datetime.utcnow() + datetime.timedelta( hours=self.notifyHours) #for g in self.ELEMENTS: for granularity in validElements: tokensExpiring = self.rsClient.getTokens( granularity, tokenExpiration=inNHours) if tokensExpiring['Value']: adminMail += '\nLIST OF EXPIRING %s TOKENS\n' % granularity adminMail += '%s|%s|%s\n' % ( 'user'.ljust(20), 'name'.ljust(15), 'status type') for token in tokensExpiring['Value']: name = token[1] stype = token[2] user = token[9] adminMail += '\n %s %s %s\n' % (user.ljust(20), name.ljust(15), stype) #If user is RS_SVC, we ignore this, whenever the token is out, this #agent will set again the token to RS_SVC if user == 'RS_SVC': continue pdp = PDP(granularity=granularity, name=name, statusType=stype) decision = pdp.takeDecision() pcresult = decision['PolicyCombinedResult'] spresult = decision['SinglePolicyResults'] expiration = token[10] mailMessage = "The token for %s %s ( %s )" % (granularity, name, stype) mailMessage = mailMessage + " will expire on %s\n\n" % expiration mailMessage = mailMessage + "You can renew it with command 'dirac-rss-renew-token'.\n" mailMessage = mailMessage + "If you don't take any action, RSS will take control of the resource.\n\n" policyMessage = '' if pcresult['Action']: policyMessage += " Policies applied will set status to %s.\n" % pcresult[ 'Status'] for spr in spresult: policyMessage += " %s Status->%s\n" % ( spr['PolicyName'].ljust(25), spr['Status']) mailMessage += policyMessage adminMail += policyMessage self.noClient.sendMail( self.rmClient.getUserRegistryCache(user)['Value'][0] [2], 'Token for %s is expiring' % name, mailMessage) if adminMail != '': #FIXME: 'ubeda' is not generic ;p self.noClient.sendMail( self.rmClient.getUserRegistryCache('ubeda')['Value'][0][2], "Token's summary", adminMail) return S_OK() except Exception: errorStr = "TokenAgent execution" self.log.exception(errorStr) return S_ERROR(errorStr) ################################################################################ #EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF
class ErrorMessageMonitor( AgentModule ): def initialize( self ): self.systemLoggingDB = SystemLoggingDB() self.notification = NotificationClient() userList = self.am_getOption( "Reviewer", [] ) self.log.debug( "Users to be notified:", ', '.join( userList ) ) mailList = [] for user in userList: mail = getUserOption( user, 'Email', '' ) if not mail: self.log.warn( "Could not get user's mail", user ) else: mailList.append( mail ) if not mailList: mailList = Operations().getValue( 'EMail/Logging', [] ) if not len( mailList ): errString = "There are no valid users in the mailing list" varString = "[" + ','.join( userList ) + "]" self.log.warn( errString, varString ) self.log.info( "List of mails to be notified", ','.join( mailList ) ) self._mailAddress = mailList self._subject = 'New error messages were entered in the SystemLoggingDB' return S_OK() def execute( self ): """ The main agent execution method """ condDict = {'ReviewedMessage':0} result = self.systemLoggingDB.getCounters( 'FixedTextMessages', ['ReviewedMessage'], condDict ) if not result['OK']: return result if not result['Value']: self.log.info( 'No messages need review' ) return S_OK( 'No messages need review' ) returnFields = [ 'FixedTextID', 'FixedTextString', 'SystemName', 'SubSystemName' ] result = self.systemLoggingDB._queryDB( showFieldList = returnFields, groupColumn = 'FixedTextString', condDict = condDict ) if not result['OK']: self.log.error( 'Failed to obtain the non reviewed Strings', result['Message'] ) return S_OK() messageList = result['Value'] if messageList == 'None' or not messageList: self.log.error( 'The DB query returned an empty result' ) return S_OK() mailBody = 'These new messages have arrived to the Logging Service\n' for message in messageList: mailBody = mailBody + "String: '" + message[1] + "'\tSystem: '" \ + message[2] + "'\tSubsystem: '" + message[3] + "'\n" if self._mailAddress: result = self.notification.sendMail( self._mailAddress, self._subject, mailBody ) if not result[ 'OK' ]: self.log.warn( "The mail could not be sent" ) return S_OK() messageIDs = [ message[0] for message in messageList ] condDict = {'FixedTextID': messageIDs} result = self.systemLoggingDB.updateFields( 'FixedTextMessages', ['ReviewedMessage'], [1], condDict = condDict ) if not result['OK']: self.log.error( 'Could not update message Status', result['ERROR'] ) return S_OK() self.log.verbose( 'Updated message Status for:', str( messageList ) ) self.log.info( "The messages have been sent for review", "There are %s new descriptions" % len( messageList ) ) return S_OK( "%s Messages have been sent for review" % len( messageList ) )
def __infoFromCE(self): sitesSection = cfgPath('Resources', 'Sites') result = gConfig.getSections(sitesSection) if not result['OK']: return grids = result['Value'] changed = False body = "" for grid in grids: gridSection = cfgPath(sitesSection, grid) result = gConfig.getSections(gridSection) if not result['OK']: return sites = result['Value'] for site in sites: # if site[-2:]!='ru': # continue siteSection = cfgPath(gridSection, site) opt = gConfig.getOptionsDict(siteSection)['Value'] name = opt.get('Name', '') if name: coor = opt.get('Coordinates', 'Unknown') mail = opt.get('Mail', 'Unknown') result = ldapSite(name) if not result['OK']: self.log.warn("BDII site %s: %s" % (name, result['Message'])) result = self.__checkAlternativeBDIISite( ldapSite, name) if result['OK']: bdiisites = result['Value'] if len(bdiisites) == 0: self.log.warn(name, "Error in bdii: leng = 0") else: if not len(bdiisites) == 1: self.log.warn( name, "Warning in bdii: leng = %d" % len(bdiisites)) bdiisite = bdiisites[0] try: longitude = bdiisite['GlueSiteLongitude'] latitude = bdiisite['GlueSiteLatitude'] newcoor = "%s:%s" % (longitude, latitude) except: self.log.warn("Error in bdii coor") newcoor = "Unknown" try: newmail = bdiisite[ 'GlueSiteSysAdminContact'].split( ":")[-1].strip() except: self.log.warn("Error in bdii mail") newmail = "Unknown" self.log.debug("%s %s %s" % (name, newcoor, newmail)) if newcoor != coor: self.log.info("%s" % (name), "%s -> %s" % (coor, newcoor)) if coor == 'Unknown': self.csAPI.setOption( cfgPath(siteSection, 'Coordinates'), newcoor) else: self.csAPI.modifyValue( cfgPath(siteSection, 'Coordinates'), newcoor) changed = True if newmail != mail: self.log.info("%s" % (name), "%s -> %s" % (mail, newmail)) if mail == 'Unknown': self.csAPI.setOption( cfgPath(siteSection, 'Mail'), newmail) else: self.csAPI.modifyValue( cfgPath(siteSection, 'Mail'), newmail) changed = True celist = List.fromChar(opt.get('CE', '')) if not celist: self.log.warn(site, 'Empty site list') continue # result = gConfig.getSections( cfgPath( siteSection,'CEs' ) # if not result['OK']: # self.log.debug( "Section CEs:", result['Message'] ) for ce in celist: ceSection = cfgPath(siteSection, 'CEs', ce) result = gConfig.getOptionsDict(ceSection) if not result['OK']: self.log.debug("Section CE", result['Message']) wnTmpDir = 'Unknown' arch = 'Unknown' os = 'Unknown' si00 = 'Unknown' pilot = 'Unknown' cetype = 'Unknown' else: ceopt = result['Value'] wnTmpDir = ceopt.get('wnTmpDir', 'Unknown') arch = ceopt.get('architecture', 'Unknown') os = ceopt.get('OS', 'Unknown') si00 = ceopt.get('SI00', 'Unknown') pilot = ceopt.get('Pilot', 'Unknown') cetype = ceopt.get('CEType', 'Unknown') result = ldapCE(ce) if not result['OK']: self.log.warn('Error in bdii for %s' % ce, result['Message']) result = self.__checkAlternativeBDIISite(ldapCE, ce) continue try: bdiice = result['Value'][0] except: self.log.warn('Error in bdii for %s' % ce, result) bdiice = None if bdiice: try: newwnTmpDir = bdiice['GlueSubClusterWNTmpDir'] except: newwnTmpDir = 'Unknown' if wnTmpDir != newwnTmpDir and newwnTmpDir != 'Unknown': section = cfgPath(ceSection, 'wnTmpDir') self.log.info(section, " -> ".join( (wnTmpDir, newwnTmpDir))) if wnTmpDir == 'Unknown': self.csAPI.setOption(section, newwnTmpDir) else: self.csAPI.modifyValue(section, newwnTmpDir) changed = True try: newarch = bdiice[ 'GlueHostArchitecturePlatformType'] except: newarch = 'Unknown' if arch != newarch and newarch != 'Unknown': section = cfgPath(ceSection, 'architecture') self.log.info(section, " -> ".join( (arch, newarch))) if arch == 'Unknown': self.csAPI.setOption(section, newarch) else: self.csAPI.modifyValue(section, newarch) changed = True try: newos = '_'.join( (bdiice['GlueHostOperatingSystemName'], bdiice['GlueHostOperatingSystemVersion'], bdiice['GlueHostOperatingSystemRelease'])) except: newos = 'Unknown' if os != newos and newos != 'Unknown': section = cfgPath(ceSection, 'OS') self.log.info(section, " -> ".join((os, newos))) if os == 'Unknown': self.csAPI.setOption(section, newos) else: self.csAPI.modifyValue(section, newos) changed = True body = body + "OS was changed %s -> %s for %s at %s\n" % ( os, newos, ce, site) try: newsi00 = bdiice['GlueHostBenchmarkSI00'] except: newsi00 = 'Unknown' if si00 != newsi00 and newsi00 != 'Unknown': section = cfgPath(ceSection, 'SI00') self.log.info(section, " -> ".join( (si00, newsi00))) if si00 == 'Unknown': self.csAPI.setOption(section, newsi00) else: self.csAPI.modifyValue(section, newsi00) changed = True try: rte = bdiice[ 'GlueHostApplicationSoftwareRunTimeEnvironment'] if self.voName == 'lhcb': if 'VO-lhcb-pilot' in rte: newpilot = 'True' else: newpilot = 'False' else: newpilot = 'Unknown' except: newpilot = 'Unknown' if pilot != newpilot and newpilot != 'Unknown': section = cfgPath(ceSection, 'Pilot') self.log.info(section, " -> ".join( (pilot, newpilot))) if pilot == 'Unknown': self.csAPI.setOption(section, newpilot) else: self.csAPI.modifyValue(section, newpilot) changed = True result = ldapService(ce) if not result['OK']: result = self.__checkAlternativeBDIISite( ldapService, ce) if result['OK'] and result['Value']: services = result['Value'] newcetype = 'LCG' for service in services: if service['GlueServiceType'].count('CREAM'): newcetype = "CREAM" else: newcetype = 'Unknown' if cetype != newcetype and newcetype != 'Unknown': section = cfgPath(ceSection, 'CEType') self.log.info(section, " -> ".join( (cetype, newcetype))) if cetype == 'Unknown': self.csAPI.setOption(section, newcetype) else: self.csAPI.modifyValue(section, newcetype) changed = True result = ldapCEState(ce, vo=self.voName) #getBDIICEVOView if not result['OK']: self.log.warn('Error in bdii for queue %s' % ce, result['Message']) result = self.__checkAlternativeBDIISite( ldapCEState, ce, self.voName) continue try: queues = result['Value'] except: self.log.warn('Error in bdii for queue %s' % ce, result['Massage']) continue for queue in queues: try: queueName = queue['GlueCEUniqueID'].split('/')[-1] except: self.log.warn('error in queuename ', queue) continue try: newmaxCPUTime = queue['GlueCEPolicyMaxCPUTime'] except: newmaxCPUTime = None newsi00 = None try: caps = queue['GlueCECapability'] if type(caps) == type(''): caps = [caps] for cap in caps: if cap.count('CPUScalingReferenceSI00'): newsi00 = cap.split('=')[-1] except: newsi00 = None queueSection = cfgPath(ceSection, 'Queues', queueName) result = gConfig.getOptionsDict(queueSection) if not result['OK']: self.log.warn("Section Queues", result['Message']) maxCPUTime = 'Unknown' si00 = 'Unknown' else: queueopt = result['Value'] maxCPUTime = queueopt.get('maxCPUTime', 'Unknown') si00 = queueopt.get('SI00', 'Unknown') if newmaxCPUTime and (maxCPUTime != newmaxCPUTime): section = cfgPath(queueSection, 'maxCPUTime') self.log.info( section, " -> ".join( (maxCPUTime, newmaxCPUTime))) if maxCPUTime == 'Unknown': self.csAPI.setOption(section, newmaxCPUTime) else: self.csAPI.modifyValue(section, newmaxCPUTime) changed = True if newsi00 and (si00 != newsi00): section = cfgPath(queueSection, 'SI00') self.log.info(section, " -> ".join( (si00, newsi00))) if si00 == 'Unknown': self.csAPI.setOption(section, newsi00) else: self.csAPI.modifyValue(section, newsi00) changed = True if changed: self.log.info(body) if body and self.addressTo and self.addressFrom: notification = NotificationClient() result = notification.sendMail(self.addressTo, self.subject, body, self.addressFrom, localAttempt=False) return self.csAPI.commitChanges(sortUsers=False) else: self.log.info("No changes found") return S_OK()
# Retrieve user installing the component result = getProxyInfo() if result[ 'OK' ]: user = result[ 'Value' ][ 'username' ] else: DIRACexit( -1 ) if not user: user = '******' notificationClient = NotificationClient() for host in resultAll[ 'Value' ]: if not resultAll[ 'Value' ][ host ][ 'OK' ]: # If the host cannot be contacted, exclude it and send message excludedHosts.append( host ) result = notificationClient.sendMail( Operations().getValue( 'EMail/Production', [] ), 'Unreachable host', '\ndirac-populate-component-db: Could not fill the database with the components from unreachable host %s\n' % host ) if not result[ 'OK' ]: gLogger.error( 'Can not send unreachable host notification mail: %s' % result[ 'Message' ] ) if not resultAll[ 'OK' ]: gLogger.error( resultAll[ 'Message' ] ) DIRACexit( -1 ) resultHosts = client.getHostInfo() if not resultHosts[ 'OK' ]: gLogger.error( resultHosts[ 'Message' ] ) DIRACexit( -1 ) resultInfo = client.getInfo() if not resultInfo[ 'OK' ]: gLogger.error( resultInfo[ 'Message' ] ) DIRACexit( -1 ) resultMySQL = client.getMySQLStatus()
def registerUser(self,paramcopy): # Unfortunately there is no way to get rid of empty text values in JS, so i have to hardcode it on server side. Hate it! default_values = ["John Smith","jsmith","*****@*****.**","+33 9 10 00 10 00","Select prefered virtual organization(s)"] default_values.append("Select your country") default_values.append("Any additional information you want to provide to administrators") dn = getUserDN() username = getUsername() if not username == "anonymous": return {"success":"false","error":"You are already registered in DIRAC with username: %s" % username} else: if not dn: return {"success":"false","error":"You have to load certificate to your browser before trying to register"} body = "" userMail = False vo = [] for i in paramcopy: if not paramcopy[i] in default_values: if i == "email": userMail = paramcopy[i] if i == "vo": vo = paramcopy[i].split(",") body = body + str(i) + ' - "' + str(paramcopy[i]) + '"\n' if not userMail: return {"success":"false","error":"Can not get your email from the request"} gLogger.info("!!! VO: ",vo) # TODO Check for previous requests if not len(vo) > 0: mails = gConfig.getValue("/Website/UserRegistrationEmail",[]) else: mails = [] for i in vo: i = i.strip() voadm = gConfig.getValue("/Registry/VO/%s/VOAdmin" % i,"") failsafe = False if voadm: tmpmail = gConfig.getValue("/Registry/Users/%s/Email" % voadm,"") if tmpmail: mails.append(tmpmail) else: gLogger.error("Can not find value for option /Registry/Users/%s/Email Trying failsafe option" % voadm) failsafe = True else: gLogger.error("Can not find value for option /Registry/VO/%s/VOAdmin Trying failsafe option" % i) failsafe = True if failsafe: failsafe = gConfig.getValue("/Website/UserRegistrationEmail",[]) if len(failsafe) > 0: for j in failsafe: mails.append(j) else: gLogger.error("Can not find value for failsafe option /Website/UserRegistrationEmail User registration for VO %s is failed" % i) mails = uniqueElements(mails) if not len(mails) > 0: groupList = list() allGroups = gConfig.getSections("/Registry/Groups") if not allGroups["OK"]: return {"success":"false","error":"No groups found at this DIRAC installation"} allGroups = allGroups["Value"] for j in allGroups: props = getProperties(j) if "UserAdministrator" in props: # property which usd for user administration groupList.append(j) groupList = uniqueElements(groupList) if not len(groupList) > 0: return {"success":"false","error":"No groups, resposible for user administration, found"} userList = list() for i in groupList: users = gConfig.getValue("/Registry/Groups/%s/Users" % i,[]) for j in users: userList.append(j) userList = uniqueElements(userList) if not len(userList) > 0: return {"success":"false","error":"Can not find a person resposible for user administration, your request can not be approuved"} mails = list() mail2name = dict() for i in userList: tmpmail = gConfig.getValue("/Registry/Users/%s/Email" % i,"") if tmpmail: mails.append(tmpmail) else: gLogger.error("Can not find value for option /Registry/Users/%s/Email" % i) mails = uniqueElements(mails) if not len(mails) > 0: return {"success":"false","error":"Can not find an email of the person resposible for the users administration, your request can not be approuved"} gLogger.info("Admins emails: ",mails) if not len(mails) > 0: return {"success":"false","error":"Can not find any emails of DIRAC Administrators"} allUsers = gConfig.getSections("/Registry/Users") if not allUsers["OK"]: return {"success":"false","error":"No users found at this DIRAC installation"} allUsers = allUsers["Value"] mail2name = dict() for i in allUsers: tmpmail = gConfig.getValue("/Registry/Users/%s/Email" % i,"") if tmpmail in mails: mail2name[tmpmail] = gConfig.getValue("/Registry/Users/%s/FullName" % i,i) sentFailed = list() sentSuccess = list() errorMessage = list() ntc = NotificationClient( getRPCClient ) for i in mails: i = i.strip() result = ntc.sendMail(i,"New user has registered",body,userMail,False) if not result["OK"]: sentFailed.append(mail2name[i]) errorMessage.append(result["Message"]) else: sentSuccess.append(mail2name[i]) gLogger.info("Sent success: ",sentSuccess) gLogger.info("Sent failure: ",sentFailed) errorMessage = uniqueElements(errorMessage) if len(sentSuccess) == 0: if not len(errorMessage) > 0: return {"success":"false","error":"No messages were sent to administrators due techincal reasons"} errorMessage = ", ".join(errorMessage) return {"success":"false","error":errorMessage} sName = ", ".join(sentSuccess) fName = ", ".join(sentFailed) if len(sentFailed) > 0: return {"success":"true","result":"Your registration request were sent successfuly to %s. Failed to sent request to %s." % (sName, fName)} return {"success":"true","result":"Your registration request were sent successfuly to %s." % sName}
from DIRAC.Core.Security.Misc import getProxyInfo from DIRAC import gConfig, gLogger from DIRAC.Core.DISET.RPCClient import RPCClient from DIRAC.ResourceStatusSystem.Utilities.CS import getMailForUser nc = NotificationClient() s = RPCClient( "ResourceStatus/ResourceStatus" ) res = getProxyInfo() if not res['OK']: gLogger.error( "Failed to get proxy information", res['Message'] ) DIRAC.exit( 2 ) userName = res['Value']['username'] group = res['Value']['group'] if group not in ( 'diracAdmin', 'lhcb_prod' ): gLogger.error( "You must be lhcb_prod or diracAdmin to execute this script" ) gLogger.info( "Please issue 'lhcb-proxy-init -g lhcb_prod' or 'lhcb-proxy-init -g diracAdmin'" ) DIRAC.exit( 2 ) for arg in args: g = s.whatIs( arg ) res = s.extendToken( g, arg, hours ) if not res['OK']: gLogger.error( "Problem with extending: %s" % res['Message'] ) DIRAC.exit( 2 ) mailMessage = "The token for %s %s has been successfully renewed for others %i hours" % ( g, arg, hours ) nc.sendMail( getMailForUser( userName )['Value'][0], 'Token for %s renewed' % arg, mailMessage ) DIRAC.exit( 0 )
class MonitorAgents(AgentModule): """MonitorAgents class.""" def __init__(self, *args, **kwargs): """Initialize the agent, clients, default values.""" AgentModule.__init__(self, *args, **kwargs) self.name = 'MonitorAgents' self.setup = "Production" self.enabled = False self.restartAgents = False self.restartExecutors = False self.restartServices = False self.controlComponents = False self.commitURLs = False self.diracLocation = "/opt/dirac/pro" self.sysAdminClient = SystemAdministratorClient(socket.gethostname()) self.jobMonClient = JobMonitoringClient() self.nClient = NotificationClient() self.csAPI = None self.agents = dict() self.executors = dict() self.services = dict() self.errors = list() self.accounting = defaultdict(dict) self.addressTo = ["*****@*****.**"] self.addressFrom = "*****@*****.**" self.emailSubject = "MonitorAgents on %s" % socket.gethostname() def logError(self, errStr, varMsg=''): """Append errors to a list, which is sent in email notification.""" self.log.error(errStr, varMsg) self.errors.append(errStr + " " + varMsg) def beginExecution(self): """Reload the configurations before every cycle.""" self.setup = self.am_getOption("Setup", self.setup) self.enabled = self.am_getOption("EnableFlag", self.enabled) self.restartAgents = self.am_getOption("RestartAgents", self.restartAgents) self.restartExecutors = self.am_getOption("RestartExecutors", self.restartExecutors) self.restartServices = self.am_getOption("RestartServices", self.restartServices) self.diracLocation = os.environ.get("DIRAC", self.diracLocation) self.addressTo = self.am_getOption('MailTo', self.addressTo) self.addressFrom = self.am_getOption('MailFrom', self.addressFrom) self.controlComponents = self.am_getOption('ControlComponents', self.controlComponents) self.commitURLs = self.am_getOption('CommitURLs', self.commitURLs) self.csAPI = CSAPI() res = self.getRunningInstances(instanceType='Agents') if not res["OK"]: return S_ERROR("Failure to get running agents") self.agents = res["Value"] res = self.getRunningInstances(instanceType='Executors') if not res["OK"]: return S_ERROR("Failure to get running executors") self.executors = res["Value"] res = self.getRunningInstances(instanceType='Services') if not res["OK"]: return S_ERROR("Failure to get running services") self.services = res["Value"] self.accounting.clear() return S_OK() def sendNotification(self): """Send email notification about changes done in the last cycle.""" if not(self.errors or self.accounting): return S_OK() emailBody = "" rows = [] for instanceName, val in self.accounting.iteritems(): rows.append([[instanceName], [val.get('Treatment', 'No Treatment')], [str(val.get('LogAge', 'Not Relevant'))]]) if rows: columns = ["Instance", "Treatment", "Log File Age (Minutes)"] emailBody += printTable(columns, rows, printOut=False, numbering=False, columnSeparator=' | ') if self.errors: emailBody += "\n\nErrors:" emailBody += "\n".join(self.errors) self.log.notice("Sending Email:\n" + emailBody) for address in self.addressTo: res = self.nClient.sendMail(address, self.emailSubject, emailBody, self.addressFrom, localAttempt=False) if not res['OK']: self.log.error("Failure to send Email notification to ", address) continue self.errors = [] self.accounting.clear() return S_OK() def getRunningInstances(self, instanceType='Agents', runitStatus='Run'): """Return a dict of running agents, executors or services. Key is agent's name, value contains dict with PollingTime, PID, Port, Module, RunitStatus, LogFileLocation :param str instanceType: 'Agents', 'Executors', 'Services' :param str runitStatus: Return only those instances with given RunitStatus or 'All' :returns: Dictionary of running instances """ res = self.sysAdminClient.getOverallStatus() if not res["OK"]: self.logError("Failure to get %s from system administrator client" % instanceType, res["Message"]) return res val = res['Value'][instanceType] runningAgents = defaultdict(dict) for system, agents in val.iteritems(): for agentName, agentInfo in agents.iteritems(): if agentInfo['Setup'] and agentInfo['Installed']: if runitStatus != 'All' and agentInfo['RunitStatus'] != runitStatus: continue confPath = cfgPath('/Systems/' + system + '/' + self.setup + '/%s/' % instanceType + agentName) for option, default in (('PollingTime', HOUR), ('Port', None)): optPath = os.path.join(confPath, option) runningAgents[agentName][option] = gConfig.getValue(optPath, default) runningAgents[agentName]["LogFileLocation"] = \ os.path.join(self.diracLocation, 'runit', system, agentName, 'log', 'current') runningAgents[agentName]["PID"] = agentInfo["PID"] runningAgents[agentName]['Module'] = agentInfo['Module'] runningAgents[agentName]['RunitStatus'] = agentInfo['RunitStatus'] runningAgents[agentName]['System'] = system return S_OK(runningAgents) def on_terminate(self, agentName, process): """Execute callback when a process terminates gracefully.""" self.log.info("%s's process with ID: %s has been terminated successfully" % (agentName, process.pid)) def execute(self): """Execute checks for agents, executors, services.""" for instanceType in ('executor', 'agent', 'service'): for name, options in getattr(self, instanceType + 's').iteritems(): # call checkAgent, checkExecutor, checkService res = getattr(self, 'check' + instanceType.capitalize())(name, options) if not res['OK']: self.logError("Failure when checking %s" % instanceType, "%s, %s" % (name, res['Message'])) res = self.componentControl() if not res['OK']: if "Stopped does not exist" not in res['Message'] and \ "Running does not exist" not in res['Message']: self.logError("Failure to control components", res['Message']) if not self.errors: res = self.checkURLs() if not res['OK']: self.logError("Failure to check URLs", res['Message']) else: self.logError('Something was wrong before, not checking URLs this time') self.sendNotification() if self.errors: return S_ERROR("Error during this cycle, check log") return S_OK() @staticmethod def getLastAccessTime(logFileLocation): """Return the age of log file.""" lastAccessTime = 0 try: lastAccessTime = os.path.getmtime(logFileLocation) lastAccessTime = datetime.fromtimestamp(lastAccessTime) except OSError as e: return S_ERROR('Failed to access logfile %s: %r' % (logFileLocation, e)) now = datetime.now() age = now - lastAccessTime return S_OK(age) def restartInstance(self, pid, instanceName, enabled): """Kill a process which is then restarted automatically.""" if not (self.enabled and enabled): self.log.info("Restarting is disabled, please restart %s manually" % instanceName) self.accounting[instanceName]["Treatment"] = "Please restart it manually" return S_OK(NO_RESTART) try: agentProc = psutil.Process(int(pid)) processesToTerminate = agentProc.children(recursive=True) processesToTerminate.append(agentProc) for proc in processesToTerminate: proc.terminate() _gone, alive = psutil.wait_procs(processesToTerminate, timeout=5, callback=partial(self.on_terminate, instanceName)) for proc in alive: self.log.info("Forcefully killing process %s" % proc.pid) proc.kill() return S_OK() except psutil.Error as err: self.logError("Exception occurred in terminating processes", "%s" % err) return S_ERROR() def checkService(self, serviceName, options): """Ping the service, restart if the ping does not respond.""" url = self._getURL(serviceName, options) self.log.info("Pinging service", url) pingRes = Client().ping(url=url) if not pingRes['OK']: self.log.info('Failure pinging service: %s: %s' % (url, pingRes['Message'])) res = self.restartInstance(int(options['PID']), serviceName, self.restartServices) if not res["OK"]: return res elif res['OK'] and res['Value'] != NO_RESTART: self.accounting[serviceName]["Treatment"] = "Successfully Restarted" self.log.info("Agent %s has been successfully restarted" % serviceName) self.log.info("Service responded OK") return S_OK() def checkAgent(self, agentName, options): """Check the age of agent's log file, if it is too old then restart the agent.""" pollingTime, currentLogLocation, pid = options['PollingTime'], options['LogFileLocation'], options['PID'] self.log.info("Checking Agent: %s" % agentName) self.log.info("Polling Time: %s" % pollingTime) self.log.info("Current Log File location: %s" % currentLogLocation) res = self.getLastAccessTime(currentLogLocation) if not res["OK"]: return res age = res["Value"] self.log.info("Current log file for %s is %d minutes old" % (agentName, (age.seconds / MINUTES))) maxLogAge = max(pollingTime + HOUR, 2 * HOUR) if age.seconds < maxLogAge: return S_OK() self.log.info("Current log file is too old for Agent %s" % agentName) self.accounting[agentName]["LogAge"] = age.seconds / MINUTES res = self.restartInstance(int(pid), agentName, self.restartAgents) if not res["OK"]: return res elif res['OK'] and res['Value'] != NO_RESTART: self.accounting[agentName]["Treatment"] = "Successfully Restarted" self.log.info("Agent %s has been successfully restarted" % agentName) return S_OK() def checkExecutor(self, executor, options): """Check the age of executor log file, if too old check for jobs in checking status, then restart the executors.""" currentLogLocation = options['LogFileLocation'] pid = options['PID'] self.log.info("Checking executor: %s" % executor) self.log.info("Current Log File location: %s" % currentLogLocation) res = self.getLastAccessTime(currentLogLocation) if not res["OK"]: return res age = res["Value"] self.log.info("Current log file for %s is %d minutes old" % (executor, (age.seconds / MINUTES))) if age.seconds < 2 * HOUR: return S_OK() self.log.info("Current log file is too old for Executor %s" % executor) self.accounting[executor]["LogAge"] = age.seconds / MINUTES res = self.checkForCheckingJobs(executor) if not res['OK']: return res if res['OK'] and res['Value'] == NO_CHECKING_JOBS: self.accounting.pop(executor, None) return S_OK(NO_RESTART) res = self.restartInstance(int(pid), executor, self.restartExecutors) if not res["OK"]: return res elif res['OK'] and res['Value'] != NO_RESTART: self.accounting[executor]["Treatment"] = "Successfully Restarted" self.log.info("Executor %s has been successfully restarted" % executor) return S_OK() def checkForCheckingJobs(self, executorName): """Check if there are checking jobs with the **executorName** as current MinorStatus.""" attrDict = {'Status': 'Checking', 'MinorStatus': executorName} # returns list of jobs IDs resJobs = self.jobMonClient.getJobs(attrDict) if not resJobs['OK']: self.logError("Could not get jobs for this executor", "%s: %s" % (executorName, resJobs['Message'])) return resJobs if resJobs['Value']: self.log.info("Found %d jobs in 'Checking' status for %s" % (len(resJobs['Value']), executorName)) return S_OK(CHECKING_JOBS) self.log.info("Found no jobs in 'Checking' status for %s" % executorName) return S_OK(NO_CHECKING_JOBS) def componentControl(self): """Monitor and control component status as defined in the CS. Check for running and stopped components and ensure they have the proper status as defined in the CS Registry/Hosts/_HOST_/[Running|Stopped] sections :returns: :func:`~DIRAC:DIRAC.Core.Utilities.ReturnValues.S_OK`, :func:`~DIRAC:DIRAC.Core.Utilities.ReturnValues.S_ERROR` """ # get the current status of the components resCurrent = self._getCurrentComponentStatus() if not resCurrent['OK']: return resCurrent currentStatus = resCurrent['Value'] resDefault = self._getDefaultComponentStatus() if not resDefault['OK']: return resDefault defaultStatus = resDefault['Value'] # ensure instances are in the right state shouldBe = {} shouldBe['Run'] = defaultStatus['Run'].intersection(currentStatus['Down']) shouldBe['Down'] = defaultStatus['Down'].intersection(currentStatus['Run']) shouldBe['Unknown'] = defaultStatus['All'].symmetric_difference(currentStatus['All']) self._ensureComponentRunning(shouldBe['Run']) self._ensureComponentDown(shouldBe['Down']) for instance in shouldBe['Unknown']: self.logError("Unknown instance", "%r, either uninstall or add to config" % instance) return S_OK() def _getCurrentComponentStatus(self): """Get current status for components.""" resOverall = self.sysAdminClient.getOverallStatus() if not resOverall['OK']: return resOverall currentStatus = {'Down': set(), 'Run': set(), 'All': set()} informationDict = resOverall['Value'] for systemsDict in informationDict.values(): for system, instancesDict in systemsDict.items(): for instanceName, instanceInfoDict in instancesDict.items(): identifier = '%s__%s' % (system, instanceName) runitStatus = instanceInfoDict.get('RunitStatus') if runitStatus in ('Run', 'Down'): currentStatus[runitStatus].add(identifier) currentStatus['All'] = currentStatus['Run'] | currentStatus['Down'] return S_OK(currentStatus) def _getDefaultComponentStatus(self): """Get the configured status of the components.""" host = socket.gethostname() defaultStatus = {'Down': set(), 'Run': set(), 'All': set()} resRunning = gConfig.getOptionsDict(os.path.join('/Registry/Hosts/', host, 'Running')) resStopped = gConfig.getOptionsDict(os.path.join('/Registry/Hosts/', host, 'Stopped')) if not resRunning['OK']: return resRunning if not resStopped['OK']: return resStopped defaultStatus['Run'] = set(resRunning['Value'].keys()) defaultStatus['Down'] = set(resStopped['Value'].keys()) defaultStatus['All'] = defaultStatus['Run'] | defaultStatus['Down'] if defaultStatus['Run'].intersection(defaultStatus['Down']): self.logError("Overlap in configuration", str(defaultStatus['Run'].intersection(defaultStatus['Down']))) return S_ERROR("Bad host configuration") return S_OK(defaultStatus) def _ensureComponentRunning(self, shouldBeRunning): """Ensure the correct components are running.""" for instance in shouldBeRunning: self.log.info("Starting instance %s" % instance) system, name = instance.split('__') if self.controlComponents: res = self.sysAdminClient.startComponent(system, name) if not res['OK']: self.logError("Failed to start component:", "%s: %s" % (instance, res['Message'])) else: self.accounting[instance]["Treatment"] = "Instance was down, started instance" else: self.accounting[instance]["Treatment"] = "Instance is down, should be started" def _ensureComponentDown(self, shouldBeDown): """Ensure the correct components are not running.""" for instance in shouldBeDown: self.log.info("Stopping instance %s" % instance) system, name = instance.split('__') if self.controlComponents: res = self.sysAdminClient.stopComponent(system, name) if not res['OK']: self.logError("Failed to stop component:", "%s: %s" % (instance, res['Message'])) else: self.accounting[instance]["Treatment"] = "Instance was running, stopped instance" else: self.accounting[instance]["Treatment"] = "Instance is running, should be stopped" def checkURLs(self): """Ensure that the running services have their URL in the Config.""" self.log.info("Checking URLs") # get services again, in case they were started/stop in controlComponents gConfig.forceRefresh(fromMaster=True) res = self.getRunningInstances(instanceType='Services', runitStatus='All') if not res["OK"]: return S_ERROR("Failure to get running services") self.services = res["Value"] for service, options in self.services.iteritems(): self.log.debug("Checking URL for %s with options %s" % (service, options)) # ignore SystemAdministrator, does not have URLs if 'SystemAdministrator' in service: continue self._checkServiceURL(service, options) if self.csAPI.csModified and self.commitURLs: self.log.info("Commiting changes to the CS") result = self.csAPI.commit() if not result['OK']: self.logError('Commit to CS failed', result['Message']) return S_ERROR("Failed to commit to CS") return S_OK() def _checkServiceURL(self, serviceName, options): """Ensure service URL is properly configured in the CS.""" url = self._getURL(serviceName, options) system = options['System'] module = options['Module'] self.log.info("Checking URLs for %s/%s" % (system, module)) urlsConfigPath = os.path.join('/Systems', system, self.setup, 'URLs', module) urls = gConfig.getValue(urlsConfigPath, []) self.log.debug("Found configured URLs for %s: %s" % (module, urls)) self.log.debug("This URL is %s" % url) runitStatus = options['RunitStatus'] wouldHave = 'Would have ' if not self.commitURLs else '' if runitStatus == 'Run' and url not in urls: urls.append(url) message = "%sAdded URL %s to URLs for %s/%s" % (wouldHave, url, system, module) self.log.info(message) self.accounting[serviceName + "/URL"]["Treatment"] = message self.csAPI.modifyValue(urlsConfigPath, ",".join(urls)) if runitStatus == 'Down' and url in urls: urls.remove(url) message = "%sRemoved URL %s from URLs for %s/%s" % (wouldHave, url, system, module) self.log.info(message) self.accounting[serviceName + "/URL"]["Treatment"] = message self.csAPI.modifyValue(urlsConfigPath, ",".join(urls)) @staticmethod def _getURL(serviceName, options): """Return URL for the service.""" system = options['System'] port = options['Port'] host = socket.gethostname() url = 'dips://%s:%s/%s/%s' % (host, port, system, serviceName) return url
def sendMail(self , sendDict=None , title=None , body=None , fromAddress=None): """ Sending an email using sendDict: { e-mail : name } as addressbook title and body is the e-mail's Subject and Body fromAddress is an email address in behalf of whom the message is sent Return success/failure JSON structure """ if not sendDict: result = "" gLogger.debug(result) return { "success" : "false" , "error" : result } if not title: result = "title argument is missing" gLogger.debug(result) return { "success" : "false" , "error" : result } if not body: result = "body argument is missing" gLogger.debug(result) return { "success" : "false" , "error" : result } if not fromAddress: result = "fromAddress argument is missing" gLogger.debug(result) return { "success" : "false" , "error" : result } sentSuccess = list() sentFailed = list() gLogger.debug("Initializing Notification client") ntc = NotificationClient(lambda x , timeout: RPCClient(x , timeout=timeout , static=True)) for email , name in sendDict.iteritems(): result = ntc.sendMail(email , title , body , fromAddress , False) if not result[ "OK" ]: error = name + ": " + result[ "Message" ] sentFailed.append(error) gLogger.error("Sent failure: " , error) else: gLogger.info("Successfully sent to %s" % name) sentSuccess.append(name) success = ", ".join(sentSuccess) failure = "\n".join(sentFailed) if len(success) > 0 and len(failure) > 0: result = "Successfully sent e-mail to: " result = result + success + "\n\nFailed to send e-mail to:\n" + failure gLogger.debug(result) return { "success" : "true" , "result" : result } elif len(success) > 0 and len(failure) < 1: result = "Successfully sent e-mail to: %s" % success gLogger.debug(result) return { "success" : "true" , "result" : result } elif len(success) < 1 and len(failure) > 0: result = "Failed to sent email to:\n%s" % failure gLogger.debug(result) return { "success" : "false" , "error" : result } else: result = "No messages were sent due technical failure" gLogger.debug(result) return { "success" : "false" , "error" : result }
body = "".join( body ) try: headers = dict( ( i.strip() , j.strip()) for i , j in ( item.split( ':' ) for item in head.split( '\\n' ) ) ) except: gLogger.error( "Failed to convert string: %s to email headers" % head ) DIRAC.exit( 3 ) if not "To" in headers: gLogger.error( "Failed to get 'To:' field from headers %s" % head ) DIRAC.exit( 4 ) to = headers[ "To" ] origin = socket.gethostname() if "From" in headers: origin = headers[ "From" ] subject = "" if "Subject" in headers: subject = headers[ "Subject" ] ntc = NotificationClient() result = ntc.sendMail( to , subject , body , origin , localAttempt = False ) if not result[ "OK" ]: gLogger.error( result[ "Message" ] ) DIRAC.exit( 5 ) DIRAC.exit( 0 )
def __lookForCE(self): knownces = self.am_getOption('BannedCEs', []) result = gConfig.getSections('/Resources/Sites') if not result['OK']: return grids = result['Value'] for grid in grids: result = gConfig.getSections('/Resources/Sites/%s' % grid) if not result['OK']: return sites = result['Value'] for site in sites: opt = gConfig.getOptionsDict('/Resources/Sites/%s/%s' % (grid, site))['Value'] ces = List.fromChar(opt.get('CE', '')) knownces += ces response = ldapCEState('', vo=self.voName) if not response['OK']: self.log.error("Error during BDII request", response['Message']) response = self.__checkAlternativeBDIISite(ldapCEState, '', self.voName) return response newces = {} for queue in response['Value']: try: queuename = queue['GlueCEUniqueID'] except: continue cename = queuename.split(":")[0] if not cename in knownces: newces[cename] = None self.log.debug("newce", cename) body = "" possibleNewSites = [] for ce in newces.iterkeys(): response = ldapCluster(ce) if not response['OK']: self.log.warn("Error during BDII request", response['Message']) response = self.__checkAlternativeBDIISite(ldapCluster, ce) continue clusters = response['Value'] if len(clusters) != 1: self.log.warn("Error in cluster length", " CE %s Length %d" % (ce, len(clusters))) if len(clusters) == 0: continue cluster = clusters[0] fkey = cluster.get('GlueForeignKey', []) if type(fkey) == type(''): fkey = [fkey] nameBDII = None for entry in fkey: if entry.count('GlueSiteUniqueID'): nameBDII = entry.split('=')[1] break if not nameBDII: continue cestring = "CE: %s, GOCDB Name: %s" % (ce, nameBDII) self.log.info(cestring) response = ldapCE(ce) if not response['OK']: self.log.warn("Error during BDII request", response['Message']) response = self.__checkAlternativeBDIISite(ldapCE, ce) continue ceinfos = response['Value'] if len(ceinfos): ceinfo = ceinfos[0] systemName = ceinfo.get('GlueHostOperatingSystemName', 'Unknown') systemVersion = ceinfo.get('GlueHostOperatingSystemVersion', 'Unknown') systemRelease = ceinfo.get('GlueHostOperatingSystemRelease', 'Unknown') else: systemName = "Unknown" systemVersion = "Unknown" systemRelease = "Unknown" osstring = "SystemName: %s, SystemVersion: %s, SystemRelease: %s" % ( systemName, systemVersion, systemRelease) self.log.info(osstring) response = ldapCEState(ce, vo=self.voName) if not response['OK']: self.log.warn("Error during BDII request", response['Message']) response = self.__checkAlternativeBDIISite( ldapCEState, ce, self.voName) continue newcestring = "\n\n%s\n%s" % (cestring, osstring) usefull = False cestates = response['Value'] for cestate in cestates: queuename = cestate.get('GlueCEUniqueID', 'UnknownName') queuestatus = cestate.get('GlueCEStateStatus', 'UnknownStatus') queuestring = "%s %s" % (queuename, queuestatus) self.log.info(queuestring) newcestring += "\n%s" % queuestring if queuestatus.count('Production'): usefull = True if usefull: body += newcestring possibleNewSites.append( 'dirac-admin-add-site DIRACSiteName %s %s' % (nameBDII, ce)) if body: body = "We are glade to inform You about new CE(s) possibly suitable for %s:\n" % self.voName + body body += "\n\nTo suppress information about CE add its name to BannedCEs list." for possibleNewSite in possibleNewSites: body = "%s\n%s" % (body, possibleNewSite) self.log.info(body) if self.addressTo and self.addressFrom: notification = NotificationClient() result = notification.sendMail(self.addressTo, self.subject, body, self.addressFrom, localAttempt=False) return S_OK()
def __infoFromCE( self ): sitesSection = cfgPath( 'Resources', 'Sites' ) result = gConfig.getSections( sitesSection ) if not result['OK']: return grids = result['Value'] changed = False body = "" for grid in grids: gridSection = cfgPath( sitesSection, grid ) result = gConfig.getSections( gridSection ) if not result['OK']: return sites = result['Value'] for site in sites: siteSection = cfgPath( gridSection, site ) opt = gConfig.getOptionsDict( siteSection )['Value'] name = opt.get( 'Name', '' ) if name: coor = opt.get( 'Coordinates', 'Unknown' ) mail = opt.get( 'Mail', 'Unknown' ) result = ldapSite( name ) if not result['OK']: self.log.warn( "BDII site %s: %s" % ( name, result['Message'] ) ) result = self.__checkAlternativeBDIISite( ldapSite, name ) if result['OK']: bdiiSites = result['Value'] if len( bdiiSites ) == 0: self.log.warn( name, "Error in BDII: leng = 0" ) else: if not len( bdiiSites ) == 1: self.log.warn( name, "Warning in BDII: leng = %d" % len( bdiiSites ) ) bdiiSite = bdiiSites[0] try: longitude = bdiiSite['GlueSiteLongitude'] latitude = bdiiSite['GlueSiteLatitude'] newcoor = "%s:%s" % ( longitude, latitude ) except: self.log.warn( "Error in BDII coordinates" ) newcoor = "Unknown" try: newmail = bdiiSite['GlueSiteSysAdminContact'].split( ":" )[-1].strip() except: self.log.warn( "Error in BDII mail" ) newmail = "Unknown" self.log.debug( "%s %s %s" % ( name, newcoor, newmail ) ) if newcoor != coor: self.log.info( "%s" % ( name ), "%s -> %s" % ( coor, newcoor ) ) if coor == 'Unknown': self.csAPI.setOption( cfgPath( siteSection, 'Coordinates' ), newcoor ) else: self.csAPI.modifyValue( cfgPath( siteSection, 'Coordinates' ), newcoor ) changed = True if newmail != mail: self.log.info( "%s" % ( name ), "%s -> %s" % ( mail, newmail ) ) if mail == 'Unknown': self.csAPI.setOption( cfgPath( siteSection, 'Mail' ), newmail ) else: self.csAPI.modifyValue( cfgPath( siteSection, 'Mail' ), newmail ) changed = True ceList = List.fromChar( opt.get( 'CE', '' ) ) if not ceList: self.log.warn( site, 'Empty site list' ) continue # result = gConfig.getSections( cfgPath( siteSection,'CEs' ) # if not result['OK']: # self.log.debug( "Section CEs:", result['Message'] ) for ce in ceList: ceSection = cfgPath( siteSection, 'CEs', ce ) result = gConfig.getOptionsDict( ceSection ) if not result['OK']: self.log.debug( "Section CE", result['Message'] ) wnTmpDir = 'Unknown' arch = 'Unknown' os = 'Unknown' si00 = 'Unknown' pilot = 'Unknown' ceType = 'Unknown' else: ceopt = result['Value'] wnTmpDir = ceopt.get( 'wnTmpDir', 'Unknown' ) arch = ceopt.get( 'architecture', 'Unknown' ) os = ceopt.get( 'OS', 'Unknown' ) si00 = ceopt.get( 'SI00', 'Unknown' ) pilot = ceopt.get( 'Pilot', 'Unknown' ) ceType = ceopt.get( 'CEType', 'Unknown' ) result = ldapCE( ce ) if not result['OK']: self.log.warn( 'Error in BDII for %s' % ce, result['Message'] ) result = self.__checkAlternativeBDIISite( ldapCE, ce ) continue try: bdiiCE = result['Value'][0] except: self.log.warn( 'Error in BDII for %s' % ce, result ) bdiiCE = None if bdiiCE: try: newWNTmpDir = bdiiCE['GlueSubClusterWNTmpDir'] except: newWNTmpDir = 'Unknown' if wnTmpDir != newWNTmpDir and newWNTmpDir != 'Unknown': section = cfgPath( ceSection, 'wnTmpDir' ) self.log.info( section, " -> ".join( ( wnTmpDir, newWNTmpDir ) ) ) if wnTmpDir == 'Unknown': self.csAPI.setOption( section, newWNTmpDir ) else: self.csAPI.modifyValue( section, newWNTmpDir ) changed = True try: newArch = bdiiCE['GlueHostArchitecturePlatformType'] except: newArch = 'Unknown' if arch != newArch and newArch != 'Unknown': section = cfgPath( ceSection, 'architecture' ) self.log.info( section, " -> ".join( ( arch, newArch ) ) ) if arch == 'Unknown': self.csAPI.setOption( section, newArch ) else: self.csAPI.modifyValue( section, newArch ) changed = True try: newOS = '_'.join( ( bdiiCE['GlueHostOperatingSystemName'], bdiiCE['GlueHostOperatingSystemVersion'], bdiiCE['GlueHostOperatingSystemRelease'] ) ) except: newOS = 'Unknown' if os != newOS and newOS != 'Unknown': section = cfgPath( ceSection, 'OS' ) self.log.info( section, " -> ".join( ( os, newOS ) ) ) if os == 'Unknown': self.csAPI.setOption( section, newOS ) else: self.csAPI.modifyValue( section, newOS ) changed = True body = body + "OS was changed %s -> %s for %s at %s\n" % ( os, newOS, ce, site ) try: newSI00 = bdiiCE['GlueHostBenchmarkSI00'] except: newSI00 = 'Unknown' if si00 != newSI00 and newSI00 != 'Unknown': section = cfgPath( ceSection, 'SI00' ) self.log.info( section, " -> ".join( ( si00, newSI00 ) ) ) if si00 == 'Unknown': self.csAPI.setOption( section, newSI00 ) else: self.csAPI.modifyValue( section, newSI00 ) changed = True try: rte = bdiiCE['GlueHostApplicationSoftwareRunTimeEnvironment'] for vo in self.voName: if vo.lower() == 'lhcb': if 'VO-lhcb-pilot' in rte: newPilot = 'True' else: newPilot = 'False' else: newPilot = 'Unknown' except: newPilot = 'Unknown' if pilot != newPilot and newPilot != 'Unknown': section = cfgPath( ceSection, 'Pilot' ) self.log.info( section, " -> ".join( ( pilot, newPilot ) ) ) if pilot == 'Unknown': self.csAPI.setOption( section, newPilot ) else: self.csAPI.modifyValue( section, newPilot ) changed = True newVO = '' for vo in self.voName: result = ldapCEState( ce, vo ) #getBDIICEVOView if not result['OK']: self.log.warn( 'Error in BDII for queue %s' % ce, result['Message'] ) result = self.__checkAlternativeBDIISite( ldapCEState, ce, vo ) continue try: queues = result['Value'] except: self.log.warn( 'Error in BDII for queue %s' % ce, result['Massage'] ) continue newCEType = 'Unknown' for queue in queues: try: queueType = queue['GlueCEImplementationName'] except: queueType = 'Unknown' if newCEType == 'Unknown': newCEType = queueType else: if queueType != newCEType: self.log.warn( 'Error in BDII for CE %s ' % ce, 'different CE types %s %s' % ( newCEType, queueType ) ) if newCEType=='ARC-CE': newCEType = 'ARC' if ceType != newCEType and newCEType != 'Unknown': section = cfgPath( ceSection, 'CEType' ) self.log.info( section, " -> ".join( ( ceType, newCEType ) ) ) if ceType == 'Unknown': self.csAPI.setOption( section, newCEType ) else: self.csAPI.modifyValue( section, newCEType ) changed = True for queue in queues: try: queueName = queue['GlueCEUniqueID'].split( '/' )[-1] except: self.log.warn( 'Error in queueName ', queue ) continue try: newMaxCPUTime = queue['GlueCEPolicyMaxCPUTime'] except: newMaxCPUTime = None newSI00 = None try: caps = queue['GlueCECapability'] if type( caps ) == type( '' ): caps = [caps] for cap in caps: if cap.count( 'CPUScalingReferenceSI00' ): newSI00 = cap.split( '=' )[-1] except: newSI00 = None queueSection = cfgPath( ceSection, 'Queues', queueName ) result = gConfig.getOptionsDict( queueSection ) if not result['OK']: self.log.warn( "Section Queues", result['Message'] ) maxCPUTime = 'Unknown' si00 = 'Unknown' allowedVOs = [''] else: queueOpt = result['Value'] maxCPUTime = queueOpt.get( 'maxCPUTime', 'Unknown' ) si00 = queueOpt.get( 'SI00', 'Unknown' ) if newVO == '': # Remember previous iteration, if none - read from conf allowedVOs = queueOpt.get( 'VO', '' ).split( "," ) else: # Else use newVO, as it can contain changes, which aren't in conf yet allowedVOs = newVO.split( "," ) if newMaxCPUTime and ( maxCPUTime != newMaxCPUTime ): section = cfgPath( queueSection, 'maxCPUTime' ) self.log.info( section, " -> ".join( ( maxCPUTime, newMaxCPUTime ) ) ) if maxCPUTime == 'Unknown': self.csAPI.setOption( section, newMaxCPUTime ) else: self.csAPI.modifyValue( section, newMaxCPUTime ) changed = True if newSI00 and ( si00 != newSI00 ): section = cfgPath( queueSection, 'SI00' ) self.log.info( section, " -> ".join( ( si00, newSI00 ) ) ) if si00 == 'Unknown': self.csAPI.setOption( section, newSI00 ) else: self.csAPI.modifyValue( section, newSI00 ) changed = True modifyVO = True # Flag saying if we need VO option to change newVO = '' if allowedVOs != ['']: for allowedVO in allowedVOs: allowedVO = allowedVO.strip() # Get rid of spaces newVO += allowedVO if allowedVO == vo: # Current VO has been already in list newVO = '' modifyVO = False # Don't change anything break # Skip next 'if', proceed to next VO newVO += ', ' if modifyVO: section = cfgPath( queueSection, 'VO' ) newVO += vo self.log.info( section, " -> ".join( ( '%s' % allowedVOs, newVO ) ) ) if allowedVOs == ['']: self.csAPI.setOption( section, newVO ) else: self.csAPI.modifyValue( section, newVO ) changed = True if changed: self.log.info( body ) if body and self.addressTo and self.addressFrom: notification = NotificationClient() result = notification.sendMail( self.addressTo, self.subject, body, self.addressFrom, localAttempt = False ) return self.csAPI.commit() else: self.log.info( "No changes found" ) return S_OK()
def __lookForNewCEs(self): """ Look up BDII for CEs not yet present in the DIRAC CS """ bannedCEs = self.am_getOption('BannedCEs', []) for vo in self.voName: # get the known CEs for a given VO, so we can know the unknowns, or no longer supported, # for a VO res = getQueues(community=vo) if not res['OK']: return res knownCEs = set() for _site, ces in res['Value'].items(): knownCEs.update(ces) knownCEs.update(bannedCEs) result = self.__getBdiiCEInfo(vo) if not result['OK']: continue bdiiInfo = result['Value'] result = getGridCEs(vo, bdiiInfo=bdiiInfo, ceBlackList=knownCEs) if not result['OK']: self.log.error('Failed to get unused CEs', result['Message']) continue # next VO siteDict = result['Value'] unknownCEs = set(result['UnknownCEs']) - set(bannedCEs) body = '' for site in siteDict: newCEs = set(siteDict[site]) # pylint: disable=no-member if not newCEs: continue ceString = '' for ce in newCEs: queueString = '' ceInfo = bdiiInfo[site]['CEs'][ce] newCEString = "CE: %s, GOCDB Site Name: %s" % (ce, site) systemTuple = siteDict[site][ce]['System'] osString = "%s_%s_%s" % (systemTuple) newCEString = "\n%s\n%s\n" % (newCEString, osString) for queue in ceInfo['Queues']: queueStatus = ceInfo['Queues'][queue].get('GlueCEStateStatus', 'UnknownStatus') if 'production' in queueStatus.lower(): ceType = ceInfo['Queues'][queue].get('GlueCEImplementationName', '') queueString += " %s %s %s\n" % (queue, queueStatus, ceType) if queueString: ceString += newCEString ceString += "Queues:\n" ceString += queueString if ceString: body += ceString if siteDict: body = "\nWe are glad to inform You about new CE(s) possibly suitable for %s:\n" % vo + body body += "\n\nTo suppress information about CE add its name to BannedCEs list.\n" body += "Add new Sites/CEs for vo %s with the command:\n" % vo body += "dirac-admin-add-resources --vo %s --ce\n" % vo if unknownCEs: body += '\n\n' body += 'There is no (longer) information about the following CEs for the %s VO.\n' % vo body += '\n'.join(sorted(unknownCEs)) body += '\n\n' if body: self.log.info(body) if self.addressTo and self.addressFrom: notification = NotificationClient() result = notification.sendMail(self.addressTo, self.subject, body, self.addressFrom, localAttempt=False, avoidSpam=True) if not result['OK']: self.log.error('Can not send new site notification mail', result['Message']) return S_OK()
def informPeople(rec, oldstate, state, author, inform): """ inform utility """ if not state or state == 'New': return # was no state change or resurrect reqId = rec['RequestID'] csS = PathFinder.getServiceSection( 'ProductionManagement/ProductionRequest') if not csS: gLogger.error('No ProductionRequest section in configuration') return fromAddress = gConfig.getValue('%s/fromAddress' % csS, '') if not fromAddress: gLogger.error('No fromAddress is defined in CS path %s/fromAddress' % csS) return sendNotifications = gConfig.getValue('%s/sendNotifications' % csS, 'Yes') if sendNotifications != 'Yes': gLogger.info('No notifications will be send') return footer = "\n\nNOTE: it is an automated notification." footer += " Don't reply please.\n" footer += "DIRAC Web portal: https://lhcb-portal-dirac.cern.ch/DIRAC/s:%s/g:" % \ PathFinder.getDIRACSetup() ppath = "/?view=tabs&theme=Grey&url_state=1|*LHCbDIRAC.ProductionRequestManager.classes.ProductionRequestManager:,\n\n" ppath += 'The request details:\n' ppath += ' Type: %s' % str(rec['RequestType']) ppath += ' Name: %s\n' % str(rec['RequestName']) ppath += ' Conditions: %s\n' % str(rec['SimCondition']) ppath += ' Processing pass: %s\n' % str(rec['ProPath']) gLogger.info(".... %s ...." % ppath) authorMail = getUserOption(author, 'Email') if authorMail: if not state in ['BK Check', 'Submitted']: if state == 'BK OK': subj = 'DIRAC: please resign your Production Request %s' % reqId body = '\n'.join([ 'Customized Simulation Conditions in your request was registered.', 'Since Bookkeeping expert could make changes in your request,', 'you are asked to confirm it.' ]) else: subj = "DIRAC: the state of Production Request %s is changed to '%s'; %s;%s" % ( reqId, state, rec.get('RequestWG', ''), rec.get('RequestName', '')) body = '\n'.join([ 'The state of your request is changed.', 'This mail is for information only.' ]) notification = NotificationClient() res = notification.sendMail(authorMail, subj, body + footer + 'lhcb_user' + ppath, fromAddress, True) if not res['OK']: gLogger.error("_inform_people: can't send email: %s" % res['Message']) if inform: subj = "DIRAC: the state of %s Production Request %s is changed to '%s'; %s;%s" % ( rec['RequestType'], reqId, state, rec.get( 'RequestWG', ''), rec.get('RequestName', '')) body = '\n'.join([ 'You have received this mail because you are' 'in the subscription list for this request' ]) for x in inform.replace(" ", ",").split(","): if x: if x.find("@") > 0: eMail = x else: eMail = getUserOption(x, 'Email') if eMail: notification = NotificationClient() res = notification.sendMail( eMail, subj, body + footer + 'lhcb_user' + ppath, fromAddress, True) if not res['OK']: gLogger.error("_inform_people: can't send email: %s" % res['Message']) if state == 'Accepted': subj = "DIRAC: the Production Request %s is accepted; %s;%s" % ( reqId, rec.get('RequestWG', ''), rec.get('RequestName', '')) body = '\n'.join([ "The Production Request is signed and ready to process", "You are informed as member of %s group" ]) groups = ['lhcb_prmgr'] for group in groups: for man in _getMemberMails(group): notification = NotificationClient() res = notification.sendMail( man, subj, body % group + footer + group + ppath, fromAddress, True) if not res['OK']: gLogger.error("_inform_people: can't send email: %s" % res['Message']) elif state == 'PPG OK' and oldstate == 'Accepted': subj = "DIRAC: returned Production Request %s; %s;%s" % ( reqId, rec.get('RequestWG', ''), rec.get('RequestName', '')) body = '\n'.join([ "Production Request is returned by Production Manager.", "As member of %s group, your are asked to correct and sign", "or to reject it.", "", "In case some other member of the group has already", "done that, please ignore this mail." ]) groups = ['lhcb_tech'] for group in groups: for man in _getMemberMails(group): notification = NotificationClient() res = notification.sendMail( man, subj, body % group + footer + group + ppath, fromAddress, True) if not res['OK']: gLogger.error("_inform_people: can't send email: %s" % res['Message']) elif state == 'BK Check': groups = ['lhcb_bk'] _aggregate(reqId, rec.get('RequestType', ''), rec.get('RequestWG', ''), rec.get('RequestName', ''), rec['SimCondition'], rec['ProPath'], groups, rec.get('reqInform', inform)) elif state == 'Submitted': groups = ['lhcb_ppg', 'lhcb_tech'] _aggregate(reqId, rec.get('RequestType', ''), rec.get('RequestWG', ''), rec.get('RequestName', ''), rec['SimCondition'], rec['ProPath'], groups, rec.get('reqInform', inform)) else: return
class MCSimulationTestingAgent (AgentModule): """An agent to check for MCSimulation productions that have undergone the testing phase. Productions that have the status Idle and are also in the table StoredJobDescription have undergone testing. A report is created by the agent from the results of the test phase and emailed to the Production Manager """ def __init__(self, *args, **kwargs): """ c'tor """ AgentModule.__init__(self, *args, **kwargs) self.transClient = None self.bkClient = None self.notifyClient = None self.operations = None self.failedTransIDs = [] def initialize(self): self.transClient = TransformationClient() self.bkClient = BookkeepingClient() self.notifyClient = NotificationClient() self.operations = Operations() self.email = self.am_getOption("MailTo", '') return S_OK() def execute(self): # get all the idle transformations extendableTTypes = Operations().getValue('Transformations/ExtendableTransfTypes', ['MCSimulation']) res = self.transClient.getTransformations(condDict={"Status": "Idle", "Type": extendableTTypes}) if res['OK']: idleTransformations = res['Value'] idleTransformations = [d.get("TransformationID") for d in idleTransformations] self.log.verbose("Found %d Idle MC transformations" % len(idleTransformations)) self.log.debug("Idle transformations found: %s" % ','.join([str(it) for it in idleTransformations])) else: self.log.error("Call to Transformation Client service failed", res['Message']) return res # get all the IDs of transformations undergoing a testing phase res = self.transClient.getStoredJobDescriptionIDs() if res['OK']: testingSimulations = res['Value'] testingSimulations = [pair[0] for pair in testingSimulations] self.log.verbose("Found %d MC transformations undergoing a testing phase" % len(testingSimulations)) self.log.debug("MC transformations found undergoing a testing phase: %s" % ','.join([str(ts) for ts in testingSimulations])) else: self.log.error("Call to Transformation Client service failed", res['Message']) return res # get the IDs that occur in both idle transformations and testing phase idleSimulations = list(set(testingSimulations).intersection(idleTransformations)) # remove those that we know failed idleSimulations = list(set(idleSimulations).difference(self.failedTransIDs)) self.log.info("MC transformations under considerations: %s (will loop on them)" % ','.join([str(idS) for idS in idleSimulations])) for transID in idleSimulations: self.log.info("Looking into %d" % transID) tasks = self.transClient.getTransformationTasks(condDict={"TransformationID": transID}) if not tasks['OK']: self.log.error("Call to Transformation Client service failed", tasks['Message']) continue else: tasks = tasks['Value'] numberOfTasks = len(tasks) numberOfDoneTasks = sum(1 for d in tasks if d.get("ExternalStatus") == "Done") self.log.verbose( "TransID = %d, numberOfTasks = %d, numberOfDoneTasks = %d" % (transID, numberOfTasks, numberOfDoneTasks)) if numberOfTasks == numberOfDoneTasks: self.log.info("All tasks have passed so the request can be accepted and the transformation updated") res = self._activateTransformation(transID, tasks) if not res['OK']: self.log.error("Error Activating Production", res['Message']) else: self.log.warn("There are failed tasks") report = self.__createReport(tasks) numberOfFailedTasks = sum(1 for d in tasks if d.get('ExternalStatus') == 'Failed') if numberOfFailedTasks == numberOfTasks: # all tasks have failed so the request can be rejected and an email report sent self._sendReport(report) self.log.warn("Transformation " + str(transID) + " failed the testing phase") self.failedTransIDs.append(transID) else: # only some tasks have failed so continue but send a warn email self.log.warn("Transformation " + str(transID) + " failed partially the testing phase, continuing anyway") doneTasks = list() for d in tasks: if d.get("ExternalStatus") == "Done": doneTasks.append(d) if not doneTasks: self.log.info("No tasks done for Transformation %d" % transID) continue res = self._activateTransformation(transID, doneTasks) if not res['OK']: self.log.error("Error Activating Production", res['Message']) continue subject = "MCSimulation Test Failure Report. TransformationID: " + str(transID) + " - some tasks failed" report['subject'] = subject self._sendReport(report) return S_OK() def _activateTransformation(self, transID, tasks): """ Calculate parameters, update the workflow, then move the production to Active """ parameters = self._calculateParameters(tasks) if not parameters['OK']: self.log.error("Error calculating parameters", parameters['Message']) return parameters else: parameters = parameters['Value'] self.log.verbose("TransID = %d, Calculated Parameters: %s" % (transID, str(parameters))) workflow = self._updateWorkflow(transID, int(round(float(parameters['CPUe']))), parameters['MCCpu']) if workflow['OK']: workflow = workflow['Value'] res = self._updateTransformationsTable(transID, workflow) if not res['OK']: self.log.error("Error updating transformations table", res['Message']) return res else: self.log.info("Transformation " + str(transID) + " passed the testing phase and is now set to active") return S_OK() def __createReport(self, tasks): """creates a report from a failed task to email to the production manager """ dateformat = '%d/%m/%Y %H:%M' transformationID = tasks[0]["TransformationID"] transformation = self.transClient.getTransformations(condDict={"TransformationID": transformationID}) transformation = transformation['Value'][0] subject = "MCSimulation Test Failure Report. TransformationID: " + str(transformationID) body = [subject] body.append("") body.append("Transformation:") body.append("----------------------------------------------------------------------") body.append("TransformationID: " + str(transformation["TransformationID"])) body.append("TransformationName: " + transformation["TransformationName"]) body.append("LastUpdate: " + transformation["LastUpdate"].strftime(dateformat)) body.append("Status: " + transformation["Status"]) body.append("Description: " + transformation["Description"]) body.append("TransformationFamily: " + str(transformation["TransformationFamily"])) body.append("Plugin: " + transformation["Plugin"]) body.append("Type: " + transformation["Type"]) body.append("AgentType: " + transformation["AgentType"]) body.append("GroupSize: " + str(transformation["GroupSize"])) body.append("MaxNumberOfTasks: " + str(transformation["MaxNumberOfTasks"])) body.append("AuthorDN: " + transformation["AuthorDN"]) body.append("TransformationGroup: " + transformation["TransformationGroup"]) body.append("InheritedFrom: " + str(transformation["InheritedFrom"])) body.append("CreationDate: " + transformation["CreationDate"].strftime(dateformat)) body.append("FileMask: " + transformation["FileMask"]) body.append("EventsPerTask: " + str(transformation["EventsPerTask"])) body.append("AuthorGroup: " + transformation["AuthorGroup"]) body.append("") body.append("Number of Tasks: " + str(len(tasks))) body.append("Tasks:") body.append("----------------------------------------------------------------------") for task in tasks: body.append("TaskID: " + str(task['TaskID'])) body.append("TargetSE: " + task['TargetSE']) body.append("LastUpdateTime: " + task['LastUpdateTime'].strftime(dateformat)) body.append("RunNumber: " + str(task['RunNumber'])) body.append("CreationTime: " + task['CreationTime'].strftime(dateformat)) body.append("ExternalID: " + str(task['ExternalID'])) body.append("ExternalStatus: " + task['ExternalStatus']) body.append("") return {'subject': subject, 'body': body} def _sendReport(self, report): """sends a given report to the production manager """ if not self.email: self.email = getUserOption(self.operations.getValue("Shifter/ProductionManager/User"), 'Email') body = '\n'.join(report['body']) res = self.notifyClient.sendMail( self.email, report['subject'], body, self.email, localAttempt=False, avoidSpam=True) if not res['OK']: self.log.error("sendMail failed", res['Message']) else: self.log.info('Mail summary sent to production manager') def _calculateParameters(self, tasks): """ Calculates the CPU time per event for the production """ jobIds = [int(x['ExternalID']) for x in tasks] res = self.bkClient.bulkJobInfo({'jobId': jobIds}) if not res['OK']: self.log.error("Error calling bkClient", res['Message']) return S_ERROR(res['Message']) successful = res['Value']['Successful'] self.log.debug("Successful tasks: %s" % str(successful)) if not successful: self.log.error("There are no successful tasks") return S_ERROR("There are no successful tasks") events = 0 CPUeJobTotal = 0.0 for job in successful.itervalues(): cpuJob = 0 for bkJob in job: if bkJob['ApplicationName'] in ['Gauss', 'Boole', 'Moore', 'Brunel', 'DaVinci']: if not events: events = bkJob['NumberOfEvents'] timeInSeconds = bkJob['CPUTIME'] cpuJob += timeInSeconds * bkJob['WNCPUHS06'] CPUeJob = cpuJob / events self.log.debug("CPUeJob = %d" % CPUeJob) CPUeJobTotal += CPUeJob CPUe = CPUeJobTotal / len(successful) # We want to produce at least 25 events per job... MCCpu = str(25 * int(round(float(CPUe)))) self.log.verbose("CPUe = %d, MCCpu = %s" % (CPUe, MCCpu)) return S_OK({'CPUe': CPUe, 'MCCpu': MCCpu}) def _updateWorkflow(self, transID, CPUe, MCCpu): """ Updates the workflow of a savedProductionDescription to reflect the calculated CPUe """ res = self.transClient.getStoredJobDescription(transID) if res['OK']: workflow = fromXMLString(res['Value'][0][1]) prod = Production() prod.LHCbJob.workflow = workflow prod.setParameter('CPUe', 'string', str(CPUe), 'CPU time per event') prod.LHCbJob.setCPUTime(MCCpu) self.log.info("Transformation ", str(transID)) self.log.info("Calculated CPUTime: ", str(CPUe)) self.log.info("CpuTime: ", str(MCCpu)) # maximum number of events to produce # try to get the CPU parameters from the configuration if possible cpuTimeAvg = Operations().getValue('Transformations/CPUTimeAvg') if cpuTimeAvg is None: self.log.info('Could not get CPUTimeAvg from config, defaulting to %d' % 200000) cpuTimeAvg = 200000 try: CPUNormalizationFactorAvg = getCPUNormalizationFactorAvg() except RuntimeError: self.log.info('Could not get CPUNormalizationFactorAvg, defaulting to %f' % 1.0) CPUNormalizationFactorAvg = 1.0 max_e = getEventsToProduce(CPUe, cpuTimeAvg, CPUNormalizationFactorAvg) prod.setParameter('maxNumberOfEvents', 'string', str(max_e), 'Maximum number of events to produce (Gauss)') return S_OK(prod.LHCbJob.workflow.toXML()) else: self.log.error("Call to Transformation Client service failed", res['Message']) return res def _updateTransformationsTable(self, transID, workflow): """ Puts the modified workflow from the savedProductionDescription table into the transformations table and removes it from the savedProductionDescription table. """ transformation = self.transClient.getTransformations(condDict={"TransformationID": transID}) if transformation['OK']: body = self.transClient.setTransformationParameter(transID, "Body", workflow) status = self.transClient.setTransformationParameter(transID, "Status", "Active") if body['OK'] and status['OK']: res = self.transClient.removeStoredJobDescription(transID) if not res['OK']: self.log.error("Call to removeStoredJobDescription failed", res['Message']) return res self.log.info("Transformation %s has an updated body and Status set to active" % transID) return S_OK() else: self.log.error("One of the updates has failed so set them both back to the previous value to ensure atomicity") self.log.debug(str(transformation['Value'][0]['Body'])) res = self.transClient.setTransformationParameter(transID, "Body", transformation['Value'][0]['Body']) if not res['OK']: self.log.error("Failure calling setTransformationParameter", res['Message']) return res res = self.transClient.setTransformationParameter(transID, "Status", transformation['Value'][0]['Status']) if not res['OK']: self.log.error("Failure calling setTransformationParameter", res['Message']) return res else: self.log.error("Call to getTransformations failed", transformation['Message']) return transformation
def __infoFromCE(self): sitesSection = cfgPath("Resources", "Sites") result = gConfig.getSections(sitesSection) if not result["OK"]: return grids = result["Value"] changed = False body = "" for grid in grids: gridSection = cfgPath(sitesSection, grid) result = gConfig.getSections(gridSection) if not result["OK"]: return sites = result["Value"] for site in sites: siteSection = cfgPath(gridSection, site) opt = gConfig.getOptionsDict(siteSection)["Value"] name = opt.get("Name", "") if name: coor = opt.get("Coordinates", "Unknown") mail = opt.get("Mail", "Unknown") result = ldapSite(name) if not result["OK"]: self.log.warn("BDII site %s: %s" % (name, result["Message"])) result = self.__checkAlternativeBDIISite(ldapSite, name) if result["OK"]: bdiiSites = result["Value"] if len(bdiiSites) == 0: self.log.warn(name, "Error in BDII: leng = 0") else: if not len(bdiiSites) == 1: self.log.warn(name, "Warning in BDII: leng = %d" % len(bdiiSites)) bdiiSite = bdiiSites[0] try: longitude = bdiiSite["GlueSiteLongitude"] latitude = bdiiSite["GlueSiteLatitude"] newcoor = "%s:%s" % (longitude, latitude) except: self.log.warn("Error in BDII coordinates") newcoor = "Unknown" try: newmail = bdiiSite["GlueSiteSysAdminContact"].split(":")[-1].strip() except: self.log.warn("Error in BDII mail") newmail = "Unknown" self.log.debug("%s %s %s" % (name, newcoor, newmail)) if newcoor != coor: self.log.info("%s" % (name), "%s -> %s" % (coor, newcoor)) if coor == "Unknown": self.csAPI.setOption(cfgPath(siteSection, "Coordinates"), newcoor) else: self.csAPI.modifyValue(cfgPath(siteSection, "Coordinates"), newcoor) changed = True if newmail != mail: self.log.info("%s" % (name), "%s -> %s" % (mail, newmail)) if mail == "Unknown": self.csAPI.setOption(cfgPath(siteSection, "Mail"), newmail) else: self.csAPI.modifyValue(cfgPath(siteSection, "Mail"), newmail) changed = True ceList = List.fromChar(opt.get("CE", "")) if not ceList: self.log.warn(site, "Empty site list") continue # result = gConfig.getSections( cfgPath( siteSection,'CEs' ) # if not result['OK']: # self.log.debug( "Section CEs:", result['Message'] ) for ce in ceList: ceSection = cfgPath(siteSection, "CEs", ce) result = gConfig.getOptionsDict(ceSection) if not result["OK"]: self.log.debug("Section CE", result["Message"]) wnTmpDir = "Unknown" arch = "Unknown" os = "Unknown" si00 = "Unknown" pilot = "Unknown" ceType = "Unknown" else: ceopt = result["Value"] wnTmpDir = ceopt.get("wnTmpDir", "Unknown") arch = ceopt.get("architecture", "Unknown") os = ceopt.get("OS", "Unknown") si00 = ceopt.get("SI00", "Unknown") pilot = ceopt.get("Pilot", "Unknown") ceType = ceopt.get("CEType", "Unknown") result = ldapCE(ce) if not result["OK"]: self.log.warn("Error in BDII for %s" % ce, result["Message"]) result = self.__checkAlternativeBDIISite(ldapCE, ce) continue try: bdiiCE = result["Value"][0] except: self.log.warn("Error in BDII for %s" % ce, result) bdiiCE = None if bdiiCE: try: newWNTmpDir = bdiiCE["GlueSubClusterWNTmpDir"] except: newWNTmpDir = "Unknown" if wnTmpDir != newWNTmpDir and newWNTmpDir != "Unknown": section = cfgPath(ceSection, "wnTmpDir") self.log.info(section, " -> ".join((wnTmpDir, newWNTmpDir))) if wnTmpDir == "Unknown": self.csAPI.setOption(section, newWNTmpDir) else: self.csAPI.modifyValue(section, newWNTmpDir) changed = True try: newArch = bdiiCE["GlueHostArchitecturePlatformType"] except: newArch = "Unknown" if arch != newArch and newArch != "Unknown": section = cfgPath(ceSection, "architecture") self.log.info(section, " -> ".join((arch, newArch))) if arch == "Unknown": self.csAPI.setOption(section, newArch) else: self.csAPI.modifyValue(section, newArch) changed = True try: newOS = "_".join( ( bdiiCE["GlueHostOperatingSystemName"], bdiiCE["GlueHostOperatingSystemVersion"], bdiiCE["GlueHostOperatingSystemRelease"], ) ) except: newOS = "Unknown" if os != newOS and newOS != "Unknown": section = cfgPath(ceSection, "OS") self.log.info(section, " -> ".join((os, newOS))) if os == "Unknown": self.csAPI.setOption(section, newOS) else: self.csAPI.modifyValue(section, newOS) changed = True body = body + "OS was changed %s -> %s for %s at %s\n" % (os, newOS, ce, site) try: newSI00 = bdiiCE["GlueHostBenchmarkSI00"] except: newSI00 = "Unknown" if si00 != newSI00 and newSI00 != "Unknown": section = cfgPath(ceSection, "SI00") self.log.info(section, " -> ".join((si00, newSI00))) if si00 == "Unknown": self.csAPI.setOption(section, newSI00) else: self.csAPI.modifyValue(section, newSI00) changed = True try: rte = bdiiCE["GlueHostApplicationSoftwareRunTimeEnvironment"] for vo in self.voName: if vo.lower() == "lhcb": if "VO-lhcb-pilot" in rte: newPilot = "True" else: newPilot = "False" else: newPilot = "Unknown" except: newPilot = "Unknown" if pilot != newPilot and newPilot != "Unknown": section = cfgPath(ceSection, "Pilot") self.log.info(section, " -> ".join((pilot, newPilot))) if pilot == "Unknown": self.csAPI.setOption(section, newPilot) else: self.csAPI.modifyValue(section, newPilot) changed = True newVO = "" for vo in self.voName: result = ldapCEState(ce, vo) # getBDIICEVOView if not result["OK"]: self.log.warn("Error in BDII for queue %s" % ce, result["Message"]) result = self.__checkAlternativeBDIISite(ldapCEState, ce, vo) continue try: queues = result["Value"] except: self.log.warn("Error in BDII for queue %s" % ce, result["Massage"]) continue newCEType = "Unknown" for queue in queues: try: queueType = queue["GlueCEImplementationName"] except: queueType = "Unknown" if newCEType == "Unknown": newCEType = queueType else: if queueType != newCEType: self.log.warn( "Error in BDII for CE %s " % ce, "different CE types %s %s" % (newCEType, queueType), ) if newCEType == "ARC-CE": newCEType = "ARC" if ceType != newCEType and newCEType != "Unknown": section = cfgPath(ceSection, "CEType") self.log.info(section, " -> ".join((ceType, newCEType))) if ceType == "Unknown": self.csAPI.setOption(section, newCEType) else: self.csAPI.modifyValue(section, newCEType) changed = True for queue in queues: try: queueName = queue["GlueCEUniqueID"].split("/")[-1] except: self.log.warn("Error in queueName ", queue) continue try: newMaxCPUTime = queue["GlueCEPolicyMaxCPUTime"] except: newMaxCPUTime = None newSI00 = None try: caps = queue["GlueCECapability"] if type(caps) == type(""): caps = [caps] for cap in caps: if cap.count("CPUScalingReferenceSI00"): newSI00 = cap.split("=")[-1] except: newSI00 = None queueSection = cfgPath(ceSection, "Queues", queueName) result = gConfig.getOptionsDict(queueSection) if not result["OK"]: self.log.warn("Section Queues", result["Message"]) maxCPUTime = "Unknown" si00 = "Unknown" allowedVOs = [""] else: queueOpt = result["Value"] maxCPUTime = queueOpt.get("maxCPUTime", "Unknown") si00 = queueOpt.get("SI00", "Unknown") if newVO == "": # Remember previous iteration, if none - read from conf allowedVOs = queueOpt.get("VO", "").split(",") else: # Else use newVO, as it can contain changes, which aren't in conf yet allowedVOs = newVO.split(",") if newMaxCPUTime and (maxCPUTime != newMaxCPUTime): section = cfgPath(queueSection, "maxCPUTime") self.log.info(section, " -> ".join((maxCPUTime, newMaxCPUTime))) if maxCPUTime == "Unknown": self.csAPI.setOption(section, newMaxCPUTime) else: self.csAPI.modifyValue(section, newMaxCPUTime) changed = True if newSI00 and (si00 != newSI00): section = cfgPath(queueSection, "SI00") self.log.info(section, " -> ".join((si00, newSI00))) if si00 == "Unknown": self.csAPI.setOption(section, newSI00) else: self.csAPI.modifyValue(section, newSI00) changed = True modifyVO = True # Flag saying if we need VO option to change newVO = "" if allowedVOs != [""]: for allowedVO in allowedVOs: allowedVO = allowedVO.strip() # Get rid of spaces newVO += allowedVO if allowedVO == vo: # Current VO has been already in list newVO = "" modifyVO = False # Don't change anything break # Skip next 'if', proceed to next VO newVO += ", " if modifyVO: section = cfgPath(queueSection, "VO") newVO += vo self.log.info(section, " -> ".join(("%s" % allowedVOs, newVO))) if allowedVOs == [""]: self.csAPI.setOption(section, newVO) else: self.csAPI.modifyValue(section, newVO) changed = True if changed: self.log.info(body) if body and self.addressTo and self.addressFrom: notification = NotificationClient() result = notification.sendMail(self.addressTo, self.subject, body, self.addressFrom, localAttempt=False) return self.csAPI.commit() else: self.log.info("No changes found") return S_OK()