def __init__(self, **kwargs): """ Constructor """ if 'hosts' in kwargs: self.__hosts = kwargs['hosts'] del kwargs['hosts'] else: result = Registry.getHosts() if result['OK']: self.__hosts = result['Value'] else: self.__hosts = [] # Excluded hosts if 'exclude' in kwargs: self.__hosts = list(set(self.__hosts) - set(kwargs['exclude'])) # Ping the hosts to remove those that don't have a SystemAdministrator service sysAdminHosts = [] for host in self.__hosts: client = SystemAdministratorClient(host) result = client.ping() if result['OK']: sysAdminHosts.append(host) self.__hosts = sysAdminHosts self.__kwargs = dict(kwargs) self.__pool = ThreadPool(len(self.__hosts)) self.__resultDict = {}
def do_stop( self, args ): """ Stop services or agents or database server usage: stop <system|*> <service|agent|*> stop mysql """ argss = args.split() if argss[0] != 'mysql': system = argss[0] if system != '*': component = argss[1] else: component = '*' client = SystemAdministratorClient( self.host, self.port ) result = client.stopComponent( system, component ) if not result['OK']: self.__errMsg( result['Message'] ) else: if system != '*' and component != '*': print "\n%s_%s stopped successfully, runit status:\n" % ( system, component ) else: print "\nComponents stopped successfully, runit status:\n" for comp in result['Value']: print comp.rjust( 32 ), ':', result['Value'][comp]['RunitStatus'] else: print "Not yet implemented"
def restartHost(hostName): """ Restart all systems and components of a host :param str hostName: name of the host you want to restart """ host, port = parseHostname(hostName) gLogger.notice("Pinging %s ..." % host) client = SystemAdministratorClient(host, port) result = client.ping() if not result['OK']: gLogger.error("Could not connect to %s: %s" % (host, result['Message'])) return result gLogger.notice("Host %s is active" % host) gLogger.notice("Initiating restart of all systems and components") # This restart call will always return S_ERROR because of SystemAdministrator restart # Connection will be lost to the host result = client.restartComponent('*', '*') if result['Message'] == "Peer closed connection": gLogger.notice( "Restarted all systems on %s : connection to SystemAdministrator lost" % host) return S_OK(result['Message']) gLogger.error("Received unxpected message: %s" % result['Message']) return result
def __init__( self, **kwargs ): """ Constructor """ if 'hosts' in kwargs: self.__hosts = kwargs['hosts'] del kwargs['hosts'] else: result = Registry.getHosts() if result['OK']: self.__hosts = result['Value'] else: self.__hosts = [] # Excluded hosts if 'exclude' in kwargs: self.__hosts = list ( set( self.__hosts ) - set( kwargs[ 'exclude' ] ) ) # Ping the hosts to remove those that don't have a SystemAdministrator service sysAdminHosts = [] for host in self.__hosts: client = SystemAdministratorClient( host ) result = client.ping() if result[ 'OK' ]: sysAdminHosts.append( host ) self.__hosts = sysAdminHosts self.__kwargs = dict( kwargs ) self.__pool = ThreadPool( len( self.__hosts ) ) self.__resultDict = {}
def showHostErrors( self ): DN = getUserDN() group = getSelectedGroup() if not "host" in request.params: return { "success" : "false" , "error" : "Name of the host is missing or not defined" } host = str( request.params[ "host" ] ) client = SystemAdministratorClient( host , None , delegatedDN=DN , delegatedGroup=group ) result = client.checkComponentLog( "*" ) gLogger.debug( result ) if not result[ "OK" ]: return { "success" : "false" , "error" : result[ "Message" ] } result = result[ "Value" ] callback = list() for key, value in result.items(): system, component = key.split( "/" ) value[ "System" ] = system value[ "Name" ] = component value[ "Host" ] = host callback.append( value ) total = len( callback ) return { "success" : "true" , "result" : callback , "total" : total }
def __init__(self, *args, **kwargs): """Initialize the agent, clients, default values.""" AgentModule.__init__(self, *args, **kwargs) self.name = "ComponentSupervisionAgent" self.setup = "DIRAC-Production" self.enabled = False self.restartAgents = False self.restartExecutors = False self.restartServices = False self.controlComponents = False self.commitURLs = False self.doNotRestartInstancePattern = ["RequestExecutingAgent"] self.diracLocation = rootPath self.sysAdminClient = SystemAdministratorClient(socket.getfqdn()) self.jobMonClient = JobMonitoringClient() self.nClient = NotificationClient() self.csAPI = None self.agents = dict() self.executors = dict() self.services = dict() self._tornadoPort = "8443" self.errors = list() self.accounting = defaultdict(dict) self.addressTo = [] self.addressFrom = "" self.emailSubject = "ComponentSupervisionAgent on %s" % socket.getfqdn( )
def do_add( self, args ): """ Add new entity to the Configuration Service usage: add system <system> <instance> """ argss = args.split() option = argss[0] del argss[0] if option == "instance" or option == "system": system = argss[0] instance = argss[1] client = SystemAdministratorClient( self.host, self.port ) result = client.getInfo() if not result['OK']: self.__errMsg( result['Message'] ) hostSetup = result['Value']['Setup'] instanceName = gConfig.getValue( '/DIRAC/Setups/%s/%s' % ( hostSetup, system ), '' ) if instanceName: if instanceName == instance: print "System %s already has instance %s defined in %s Setup" % ( system, instance, hostSetup ) else: self.__errMsg( "System %s already has instance %s defined in %s Setup" % ( system, instance, hostSetup ) ) return result = InstallTools.addSystemInstance( system, instance, hostSetup ) if not result['OK']: self.__errMsg( result['Message'] ) else: print "%s system instance %s added successfully" % ( system, instance ) else: print "Unknown option:", option
def showLog( self ): DN = getUserDN() group = getSelectedGroup() if not "host" in request.params: return "Name of the host is missing or not defined" host = str( request.params[ "host" ] ) if not "system" in request.params: return "Name of the system is missing or not defined" system = str( request.params[ "system" ] ) if not "component" in request.params: return "Name of component is missing or not defined" name = str( request.params[ "component" ] ) client = SystemAdministratorClient( host , None , delegatedDN=DN , delegatedGroup=group ) result = client.getLogTail( system , name ) gLogger.debug( result ) if not result[ "OK" ]: return result[ "Message" ] result = result[ "Value" ] key = system + "_" + name if not key in result: return "%s key is absent in service response" % key log = result[ key ] return log.replace( "\n" , "<br>" )
def submit( self ): """ Returns flatten list of components (services, agents) installed on hosts returned by getHosts function """ checkUserCredentials() DN = getUserDN() group = getSelectedGroup() callback = list() request = self.request() if not 'Hostname' in request: return { "success" : "false" , "error" : "Name of the host is absent" } host = request[ 'Hostname' ] client = SystemAdministratorClient( host , None , delegatedDN=DN , delegatedGroup=group ) result = client.getOverallStatus() gLogger.debug( "Result of getOverallStatus(): %s" % result ) if not result[ "OK" ]: return { "success" : "false" , "error" : result[ "Message" ] } overall = result[ "Value" ] for record in self.flatten( overall ): record[ "Host" ] = host callback.append( record ) return { "success" : "true" , "result" : callback }
def do_stop(self, args): """ Stop services or agents or database server usage: stop <system|*> <service|agent|*> stop mysql """ argss = args.split() if argss[0] != 'mysql': system = argss[0] if system != '*': component = argss[1] else: component = '*' client = SystemAdministratorClient(self.host, self.port) result = client.stopComponent(system, component) if not result['OK']: self.__errMsg(result['Message']) else: if system != '*' and component != '*': print "\n%s_%s stopped successfully, runit status:\n" % ( system, component) else: print "\nComponents stopped successfully, runit status:\n" for comp in result['Value']: print comp.rjust( 32), ':', result['Value'][comp]['RunitStatus'] else: print "Not yet implemented"
def do_revert( self, args ): """ Revert the last installed version of software to the previous one usage: revert """ client = SystemAdministratorClient( self.host, self.port ) result = client.revertSoftware() if not result['OK']: print "Error:", result['Message'] else: print "Software reverted to", result['Value']
def initialize(self): self.NON_CRITICAL = "NonCritical" self.CRITICAL = "Critical" self.FAILURE = "FAILURE" self.OK = "OK" self.setup = gConfig.getValue('/DIRAC/Setup', 'LHCb-Development') self.outputNonCritical = True #all components not present here will be treated as non critical self.admClient = SystemAdministratorClient('localhost') return S_OK()
def do_revert(self, args): """ Revert the last installed version of software to the previous one usage: revert """ client = SystemAdministratorClient(self.host, self.port) result = client.revertSoftware() if not result['OK']: print "Error:", result['Message'] else: print "Software reverted to", result['Value']
def __init__(self, *args, **kwargs): """Initialize the agent, clients, default values.""" AgentModule.__init__(self, *args, **kwargs) self.name = 'MonitorAgents' self.setup = "Production" self.enabled = False self.restartAgents = False self.restartExecutors = False self.restartServices = False self.controlComponents = False self.commitURLs = False self.diracLocation = "/opt/dirac/pro" self.sysAdminClient = SystemAdministratorClient(socket.gethostname()) self.jobMonClient = JobMonitoringClient() self.nClient = NotificationClient() self.csAPI = None self.agents = dict() self.executors = dict() self.services = dict() self.errors = list() self.accounting = defaultdict(dict) self.addressTo = ["*****@*****.**"] self.addressFrom = "*****@*****.**" self.emailSubject = "MonitorAgents on %s" % socket.gethostname()
def __executeClient(self, host, method, *parms, **kwargs): """ Execute RPC method on a given host """ client = SystemAdministratorClient(host, **self.__kwargs) result = getattr(client, method)(*parms, **kwargs) result['Host'] = host return result
def __executeClient(self, host, method, *parms, **kwargs): """Execute RPC method on a given host""" hostName = Registry.getHostOption(host, "Host", host) client = SystemAdministratorClient(hostName, **self.__kwargs) result = getattr(client, method)(*parms, **kwargs) result["Host"] = host return result
def manageService(self, service, action): """ Manage services running on this machine usage: service <action> <serviceName> """ client = ComponentMonitoringClient() result = client.getInstallations({'UninstallationTime': None}, { 'System': 'External', 'Module': service, 'Type': 'External' }, {'HostName': self.host}, False) if not result['OK']: self._errMsg(result['Message']) return elif len(result['Value']) < 1: self._errMsg('%s is not installed' % (service)) return client = SystemAdministratorClient(self.host, self.port) if action == 'start': result = client.startService(service) elif action == 'stop': result = client.stopService(service) elif action == 'restart': result = client.restartService(service) elif action == 'status': result = client.statusService(service) if not result['OK']: self._errMsg(result['Message']) return gLogger.notice(result['Value'])
def __actionHost( self ): """ Restart all DIRAC components on a given host """ if not "hostname" in request.params: return { "success" : "false" , "error" : "No hostname given" } hosts = request.params[ "hostname" ].split( "," ) DN = getUserDN() group = getSelectedGroup() self.actionSuccess = list() self.actionFailed = list() for i in hosts: client = SystemAdministratorClient( str( i ) , None , delegatedDN=DN , delegatedGroup=group ) if self.action is "restart": result = client.restartComponent( str( "*" ) , str( "*" ) ) elif self.action is "revert": result = client.revertSoftware() else: error = i + ": Action %s is not defined" % self.action self.actionFailed.append( error ) continue gLogger.always( result ) if not result[ "OK" ]: if result[ "Message" ].find( "Unexpected EOF" ) > 0: msg = "Signal 'Unexpected EOF' received. Most likely DIRAC components" msg = i + ": " + msg + " were successfully restarted." self.actionSuccess.append( msg ) continue error = i + ": " + result[ "Message" ] self.actionFailed.append( error ) gLogger.error( error ) else: gLogger.info( result[ "Value" ] ) self.actionSuccess.append( i ) self.prefix = "Host" return self.__aftermath()
def web_getHostLog(self): userData = self.getSessionData() DN = str(userData["user"]["DN"]) group = str(userData["user"]["group"]) if not "host" in self.request.arguments: self.finish({ "success": "false", "error": "Name of the host is missing or not defined" }) return host = str(self.request.arguments["host"][0]) if not "system" in self.request.arguments: self.finish({ "success": "false", "error": "Name of the system is missing or not defined" }) return system = str(self.request.arguments["system"][0]) if not "component" in self.request.arguments: self.finish({ "success": "false", "error": "Name of component is missing or not defined" }) return name = str(self.request.arguments["component"][0]) client = SystemAdministratorClient(host, None, delegatedDN=DN, delegatedGroup=group) result = yield self.threadTask(client.getLogTail, system, name) gLogger.debug(result) if not result["OK"]: self.finish({"success": "false", "error": result["Message"]}) return result = result["Value"] key = system + "_" + name if not key in result: self.finish({ "success": "false", "error": "%s key is absent in service response" % key }) return log = result[key] self.finish({"success": "true", "result": log.replace("\n", "<br>")})
def do_exec( self, args ): """ Execute a shell command on the remote host and get back the output usage: exec <cmd> [<arguments>] """ client = SystemAdministratorClient( self.host, self.port ) result = client.executeCommand( args ) if not result['OK']: self.__errMsg( result['Message'] ) status, output, error = result['Value'] print for line in output.split( '\n' ): print line if error: self.__errMsg( status ) for line in error.split( '\n' ): print line
def do_update(self, args): """ Update the software on the target host to a given version usage: update <version> """ argss = args.split() version = argss[0] client = SystemAdministratorClient(self.host, self.port) print "Software update can take a while, please wait ..." result = client.updateSoftware(version) if not result['OK']: self.__errMsg("Failed to update the software") print result['Message'] else: print "Software successfully updated." print "You should restart the services to use the new software version." print "Think of updating /Operation/<vo>/<setup>/Versions section in the CS"
def do_exec(self, args): """ Execute a shell command on the remote host and get back the output usage: exec <cmd> [<arguments>] """ client = SystemAdministratorClient(self.host, self.port) result = client.executeCommand(args) if not result['OK']: self.__errMsg(result['Message']) status, output, error = result['Value'] print for line in output.split('\n'): print line if error: self.__errMsg(status) for line in error.split('\n'): print line
def do_update( self, args ): """ Update the software on the target host to a given version usage: update <version> """ argss = args.split() version = argss[0] client = SystemAdministratorClient( self.host, self.port ) print "Software update can take a while, please wait ..." result = client.updateSoftware( version ) if not result['OK']: self.__errMsg( "Failed to update the software" ) print result['Message'] else: print "Software successfully updated." print "You should restart the services to use the new software version." print "Think of updating /Operation/<vo>/<setup>/Versions section in the CS"
def do_cd(self, args): """ Change the current working directory on the target host Usage: cd <dirpath> """ argss = args.split() if len(argss) == 0: # Return to $HOME if self.homeDir: self.previous_cwd = self.cwd self.cwd = self.homeDir else: client = SystemAdministratorClient(self.host, self.port) command = 'echo $HOME' result = client.executeCommand(command) if not result['OK']: self.__errMsg(result['Message']) return status, output, _error = result['Value'] if not status and output: self.homeDir = output.strip() self.previous_cwd = self.cwd self.cwd = self.homeDir self.prompt = '[%s:%s]> ' % (self.host, self.cwd) return newPath = argss[0] if newPath == '-': if self.previous_cwd: cwd = self.cwd self.cwd = self.previous_cwd self.previous_cwd = cwd elif newPath.startswith('/'): self.previous_cwd = self.cwd self.cwd = newPath else: newPath = self.cwd + '/' + newPath self.previous_cwd = self.cwd self.cwd = os.path.normpath(newPath) self.prompt = '[%s:%s]> ' % (self.host, self.cwd)
def do_cd( self, args ): """ Change the current working directory on the target host Usage: cd <dirpath> """ argss = args.split() if len( argss ) == 0: # Return to $HOME if self.homeDir: self.previous_cwd = self.cwd self.cwd = self.homeDir else: client = SystemAdministratorClient( self.host, self.port ) command = 'echo $HOME' result = client.executeCommand( command ) if not result['OK']: self.__errMsg( result['Message'] ) return status, output, _error = result['Value'] if not status and output: self.homeDir = output.strip() self.previous_cwd = self.cwd self.cwd = self.homeDir self.prompt = '[%s:%s]> ' % ( self.host, self.cwd ) return newPath = argss[0] if newPath == '-': if self.previous_cwd: cwd = self.cwd self.cwd = self.previous_cwd self.previous_cwd = cwd elif newPath.startswith( '/' ): self.previous_cwd = self.cwd self.cwd = newPath else: newPath = self.cwd + '/' + newPath self.previous_cwd = self.cwd self.cwd = os.path.normpath( newPath ) self.prompt = '[%s:%s]> ' % ( self.host, self.cwd )
def getErrors(self, argss): """ Get and print out errors from the logs of specified components """ component = '' if len(argss) < 1: component = '*' else: system = argss[0] if system == "*": component = '*' else: if len(argss) < 2: print print self.do_show.__doc__ return comp = argss[1] component = '/'.join([system, comp]) client = SystemAdministratorClient(self.host, self.port) result = client.checkComponentLog(component) if not result['OK']: self.__errMsg(result['Message']) else: fields = [ 'System', 'Component', 'Last hour', 'Last day', 'Last error' ] records = [] for cname in result['Value']: system, component = cname.split('/') errors_1 = result['Value'][cname]['ErrorsHour'] errors_24 = result['Value'][cname]['ErrorsDay'] lastError = result['Value'][cname]['LastError'] lastError.strip() if len(lastError) > 80: lastError = lastError[:80] + '...' records.append([ system, component, str(errors_1), str(errors_24), lastError ]) records.sort() printTable(fields, records)
def do_uninstall( self, args ): """ Uninstall DIRAC component usage: uninstall <system> <component> """ argss = args.split() if not argss or len(argss) != 2: print self.do_uninstall.__doc__ return system,component = argss client = SystemAdministratorClient( self.host, self.port ) result = client.uninstallComponent( system, component ) if not result['OK']: print "Error:", result['Message'] else: print "Successfully uninstalled %s/%s" % (system,component)
def do_uninstall(self, args): """ Uninstall DIRAC component usage: uninstall <system> <component> """ argss = args.split() if not argss or len(argss) != 2: print self.do_uninstall.__doc__ return system, component = argss client = SystemAdministratorClient(self.host, self.port) result = client.uninstallComponent(system, component) if not result['OK']: print "Error:", result['Message'] else: print "Successfully uninstalled %s/%s" % (system, component)
def do_update( self, args ): """ Update the software on the target host to a given version usage: update <version> [ -r <rootPath> ] [ -g <lcgVersion> ] where rootPath - path to the DIRAC installation lcgVersion - version of the LCG bindings to install """ try: argss = args.split() version = argss[0] rootPath = '' lcgVersion = '' del argss[0] while len( argss ) > 0: if argss[0] == '-r': rootPath = argss[1] del argss[0] del argss[0] elif argss[0] == '-g': lcgVersion = argss[1] del argss[0] del argss[0] except Exception as x: gLogger.notice( "ERROR: wrong input:", str( x ) ) gLogger.notice( self.do_update.__doc__ ) return client = SystemAdministratorClient( self.host, self.port ) gLogger.notice( "Software update can take a while, please wait ..." ) result = client.updateSoftware( version, rootPath, lcgVersion, timeout = 300 ) if not result['OK']: self._errMsg( "Failed to update the software" ) gLogger.notice( result['Message'] ) else: gLogger.notice( "Software successfully updated." ) gLogger.notice( "You should restart the services to use the new software version." ) gLogger.notice( "Think of updating /Operations/<vo>/<setup>/Pilot/Versions section in the CS" )
def do_restart( self, args ): """ Restart services or agents or database server usage: restart <system|*> <service|agent|*> restart mysql """ if not args: gLogger.notice( self.do_restart.__doc__ ) return argss = args.split() option = argss[0] del argss[0] if option != 'mysql': if option != "*": if len( argss ) < 1: gLogger.notice( self.do_restart.__doc__ ) return system = option if system != '*': component = argss[0] else: component = '*' client = SystemAdministratorClient( self.host, self.port ) result = client.restartComponent( system, component ) if not result['OK']: if system == '*': gLogger.notice( "All systems are restarted, connection to SystemAdministrator is lost" ) else: self.__errMsg( result['Message'] ) else: if system != '*' and component != '*': gLogger.notice( "\n%s_%s started successfully, runit status:\n" % ( system, component ) ) else: gLogger.notice( "\nComponents started successfully, runit status:\n" ) for comp in result['Value']: gLogger.notice( ( comp.rjust( 32 ), ':', result['Value'][comp]['RunitStatus'] ) ) else: gLogger.notice( "Not yet implemented" )
def do_exec( self, args ): """ Execute a shell command on the remote host and get back the output usage: exec <cmd> [<arguments>] """ client = SystemAdministratorClient( self.host, self.port ) command = 'cd %s;' % self.cwd + args result = client.executeCommand( command ) if not result['OK']: self.__errMsg( result['Message'] ) return status, output, error = result['Value'] gLogger.notice( '' ) for line in output.split( '\n' ): gLogger.notice( line ) if error: self.__errMsg( status ) for line in error.split( '\n' ): gLogger.notice( line )
def initialize( self ): self.NON_CRITICAL = "NonCritical" self.CRITICAL = "Critical" self.FAILURE = "FAILURE" self.OK = "OK" self.setup = gConfig.getValue('/DIRAC/Setup','LHCb-Development') self.outputNonCritical = True #all components not present here will be treated as non critical self.admClient = SystemAdministratorClient('localhost') return S_OK()
def do_start(self, args): """ Start services or agents or database server usage: start <system|*> <service|agent|*> start mysql """ argss = args.split() if len(argss) < 2: print self.do_start.__doc__ return option = argss[0] del argss[0] if option != 'mysql': if len(argss) < 1: print self.do_start.__doc__ return system = option if system != '*': component = argss[0] else: component = '*' client = SystemAdministratorClient(self.host, self.port) result = client.startComponent(system, component) if not result['OK']: self.__errMsg(result['Message']) else: if system != '*' and component != '*': print "\n%s_%s started successfully, runit status:\n" % ( system, component) else: print "\nComponents started successfully, runit status:\n" for comp in result['Value']: print comp.rjust( 32), ':', result['Value'][comp]['RunitStatus'] else: print "Not yet implemented"
def updateHost(hostName, version): """ Apply update to specific host :param str hostName: name of the host you want to update :param str version: version vArBpC you want to update to """ host, port = parseHostname(hostName) client = SystemAdministratorClient(host, port) result = client.ping() if not result['OK']: gLogger.error("Cannot connect to %s" % host) return result gLogger.notice( "Initiating software update of %s, this can take a while, please be patient ..." % host) result = client.updateSoftware(version, '', '', timeout=600) if not result['OK']: return result return S_OK()
def getLog( self, argss ): """ Get the tail of the log file of the given component """ if len( argss ) < 2: print print self.do_show.__doc__ return system = argss[0] component = argss[1] nLines = 40 if len( argss ) > 2: nLines = int( argss[2] ) client = SystemAdministratorClient( self.host, self.port ) result = client.getLogTail( system, component, nLines ) if not result['OK']: self.__errMsg( result['Message'] ) elif result['Value']: for line in result['Value']['_'.join( [system, component] )].split( '\n' ): print ' ', line else: print "No logs found"
def do_start( self, args ): """ Start services or agents or database server usage: start <system|*> <service|agent|*> start mysql """ argss = args.split() if len( argss ) < 2: print self.do_start.__doc__ return option = argss[0] del argss[0] if option != 'mysql': if len( argss ) < 1: print self.do_start.__doc__ return system = option if system != '*': component = argss[0] else: component = '*' client = SystemAdministratorClient( self.host, self.port ) result = client.startComponent( system, component ) if not result['OK']: self.__errMsg( result['Message'] ) else: if system != '*' and component != '*': print "\n%s_%s started successfully, runit status:\n" % ( system, component ) else: print "\nComponents started successfully, runit status:\n" for comp in result['Value']: print comp.rjust( 32 ), ':', result['Value'][comp]['RunitStatus'] else: print "Not yet implemented"
def getErrors( self, argss ): """ Get and print out errors from the logs of specified components """ component = '' if len( argss ) < 1: component = '*' else: system = argss[0] if system == "*": component = '*' else: if len( argss ) < 2: print print self.do_show.__doc__ return comp = argss[1] component = '/'.join( [system, comp] ) client = SystemAdministratorClient( self.host, self.port ) result = client.checkComponentLog( component ) if not result['OK']: self.__errMsg( result['Message'] ) else: fields = ['System', 'Component', 'Last hour', 'Last day', 'Last error'] records = [] for cname in result['Value']: system, component = cname.split( '/' ) errors_1 = result['Value'][cname]['ErrorsHour'] errors_24 = result['Value'][cname]['ErrorsDay'] lastError = result['Value'][cname]['LastError'] lastError.strip() if len( lastError ) > 80: lastError = lastError[:80] + '...' records.append( [system, component, str( errors_1 ), str( errors_24 ), lastError] ) records.sort() printTable( fields, records )
def web_getHostData(self): """ Returns flatten list of components (services, agents) installed on hosts returned by getHosts function """ # checkUserCredentials() userData = self.getSessionData() DN = str(userData["user"]["DN"]) group = str(userData["user"]["group"]) callback = list() if not (self.request.arguments.has_key("hostname") and self.request.arguments["hostname"][0]): self.finish({ "success": "false", "error": "Name of the host is absent" }) return host = self.request.arguments["hostname"][0] client = SystemAdministratorClient(host, None, delegatedDN=DN, delegatedGroup=group) result = yield self.threadTask(client.getOverallStatus) gLogger.debug("Result of getOverallStatus(): %s" % result) if not result["OK"]: self.finish({"success": "false", "error": result["Message"]}) return overall = result["Value"] for record in self.flatten(overall): record["Host"] = host callback.append(record) self.finish({"success": "true", "result": callback})
def web_getHostErrors(self): userData = self.getSessionData() DN = str(userData["user"]["DN"]) group = str(userData["user"]["group"]) if not "host" in self.request.arguments: self.finish({ "success": "false", "error": "Name of the host is missing or not defined" }) return host = str(self.request.arguments["host"][0]) client = SystemAdministratorClient(host, None, delegatedDN=DN, delegatedGroup=group) result = yield self.threadTask(client.checkComponentLog, "*") gLogger.debug(result) if not result["OK"]: self.finish({"success": "false", "error": result["Message"]}) return result = result["Value"] callback = list() for key, value in result.items(): system, component = key.split("/") value["System"] = system value["Name"] = component value["Host"] = host callback.append(value) total = len(callback) self.finish({"success": "true", "result": callback, "total": total})
def __getClient(self): return SystemAdministratorClient(self.host, self.port)
def do_uninstall( self, args ): """ Uninstall DIRAC component usage: uninstall db <database> uninstall <-f ForceLogUninstall> <system> <component> """ argss = args.split() if not argss: gLogger.notice( self.do_uninstall.__doc__ ) return # Retrieve user uninstalling the component result = getProxyInfo() if not result[ 'OK' ]: self.__errMsg( result[ 'Message'] ) user = result[ 'Value' ][ 'username' ] option = argss[0] if option == 'db': component = argss[1] client = SystemAdministratorClient( self.host, self.port ) result = client.getHostInfo() if not result[ 'OK' ]: self.__errMsg( result[ 'Message' ] ) return else: cpu = result[ 'Value' ][ 'CPUModel' ] hostname = self.host result = client.getAvailableDatabases() if not result[ 'OK' ]: self.__errMsg( result[ 'Message' ] ) return system = result[ 'Value' ][ component ][ 'System' ] result = MonitoringUtilities.monitorUninstallation( system , component, hostname = hostname, cpu = cpu ) if not result[ 'OK' ]: self.__errMsg( result[ 'Message' ] ) return result = client.uninstallDatabase( component ) if not result[ 'OK' ]: self.__errMsg( result[ 'Message' ] ) else: gLogger.notice( "Successfully uninstalled %s" % ( component ) ) else: if option == '-f': force = True del argss[0] else: force = False if len( argss ) != 2: gLogger.notice( self.do_uninstall.__doc__ ) return system, component = argss client = SystemAdministratorClient( self.host, self.port ) monitoringClient = ComponentMonitoringClient() result = monitoringClient.getInstallations( { 'Instance': component, 'UnInstallationTime': None }, { 'System': system }, { 'HostName': self.host }, True ) if not result[ 'OK' ]: self.__errMsg( result[ 'Message' ] ) return if len( result[ 'Value' ] ) < 1: self.__errMsg( "Given component does not exist" ) return if len( result[ 'Value' ] ) > 1: self.__errMsg( "Too many components match" ) return removeLogs = False if force: removeLogs = True else: if result[ 'Value' ][0][ 'Component' ][ 'Type' ] in self.runitComponents: result = promptUser( 'Remove logs?', ['y', 'n'], 'n' ) if result[ 'OK' ]: removeLogs = result[ 'Value' ] == 'y' result = client.uninstallComponent( system, component, removeLogs ) if not result[ 'OK' ]: self.__errMsg( result[ 'Message' ] ) else: gLogger.notice( "Successfully uninstalled %s/%s" % ( system, component ) ) result = client.getHostInfo() if not result[ 'OK' ]: self.__errMsg( result[ 'Message' ] ) return else: cpu = result[ 'Value' ][ 'CPUModel' ] hostname = self.host result = MonitoringUtilities.monitorUninstallation( system, component, hostname = hostname, cpu = cpu ) if not result[ 'OK' ]: return result
def do_show( self, args ): """ Show list of components with various related information usage: show software - show components for which software is available show installed - show components installed in the host with runit system show setup - show components set up for automatic running in the host show status - show status of the installed components show database - show status of the databases show mysql - show status of the MySQL server show log <system> <service|agent> [nlines] - show last <nlines> lines in the component log file show info - show version of software and setup show errors [*|<system> <service|agent>] - show error count for the given component or all the components in the last hour and day """ argss = args.split() if not argss: print self.do_show.__doc__ return option = argss[0] del argss[0] if option == 'software': client = SystemAdministratorClient( self.host, self.port ) result = client.getSoftwareComponents() if not result['OK']: print " ERROR:", result['Message'] else: print pprint.pprint( result['Value'] ) elif option == 'installed': client = SystemAdministratorClient( self.host, self.port ) result = client.getInstalledComponents() if not result['OK']: print " ERROR:", result['Message'] else: print pprint.pprint( result['Value'] ) elif option == 'setup': client = SystemAdministratorClient( self.host, self.port ) result = client.getSetupComponents() if not result['OK']: print " ERROR:", result['Message'] else: print pprint.pprint( result['Value'] ) elif option == 'status': client = SystemAdministratorClient( self.host, self.port ) result = client.getOverallStatus() if not result['OK']: print "ERROR:", result['Message'] else: rDict = result['Value'] print print " System", ' '*20, 'Name', ' '*15, 'Type', ' '*13, 'Setup Installed Runit Uptime PID' print '-' * 116 for compType in rDict: for system in rDict[compType]: for component in rDict[compType][system]: if rDict[compType][system][component]['Installed']: print system.ljust( 28 ), component.ljust( 28 ), compType.lower()[:-1].ljust( 7 ), if rDict[compType][system][component]['Setup']: print 'SetUp'.rjust( 12 ), else: print 'NotSetup'.rjust( 12 ), if rDict[compType][system][component]['Installed']: print 'Installed'.rjust( 12 ), else: print 'NotInstalled'.rjust( 12 ), print str( rDict[compType][system][component]['RunitStatus'] ).ljust( 7 ), print str( rDict[compType][system][component]['Timeup'] ).rjust( 7 ), print str( rDict[compType][system][component]['PID'] ).rjust( 8 ), print elif option == 'database' or option == 'databases': client = SystemAdministratorClient( self.host, self.port ) if not InstallTools.mysqlPassword: InstallTools.mysqlPassword = "******" InstallTools.getMySQLPasswords() result = client.getDatabases( InstallTools.mysqlRootPwd ) if not result['OK']: print "ERROR:", result['Message'] return resultSW = client.getAvailableDatabases() if not resultSW['OK']: print "ERROR:", resultSW['Message'] return sw = resultSW['Value'] installed = result['Value'] print for db in sw: if db in installed: print db.rjust( 25 ), ': Installed' else: print db.rjust( 25 ), ': Not installed' if not sw: print "No database found" elif option == 'mysql': client = SystemAdministratorClient( self.host, self.port ) result = client.getMySQLStatus() if not result['OK']: print "ERROR:", result['Message'] elif result['Value']: print for par, value in result['Value'].items(): print par.rjust( 28 ), ':', value else: print "No MySQL database found" elif option == "log": self.getLog( argss ) elif option == "info": client = SystemAdministratorClient( self.host, self.port ) result = client.getInfo() if not result['OK']: print "ERROR:", result['Message'] else: print print "Setup:", result['Value']['Setup'] print "DIRAC version:", result['Value']['DIRAC'] if result['Value']['Extensions']: for e, v in result['Value']['Extensions'].items(): print "%s version" % e, v print elif option == "errors": self.getErrors( argss ) else: print "Unknown option:", option
class MonitorAgents(AgentModule): """MonitorAgents class.""" def __init__(self, *args, **kwargs): """Initialize the agent, clients, default values.""" AgentModule.__init__(self, *args, **kwargs) self.name = 'MonitorAgents' self.setup = "Production" self.enabled = False self.restartAgents = False self.restartExecutors = False self.restartServices = False self.controlComponents = False self.commitURLs = False self.diracLocation = "/opt/dirac/pro" self.sysAdminClient = SystemAdministratorClient(socket.gethostname()) self.jobMonClient = JobMonitoringClient() self.nClient = NotificationClient() self.csAPI = None self.agents = dict() self.executors = dict() self.services = dict() self.errors = list() self.accounting = defaultdict(dict) self.addressTo = ["*****@*****.**"] self.addressFrom = "*****@*****.**" self.emailSubject = "MonitorAgents on %s" % socket.gethostname() def logError(self, errStr, varMsg=''): """Append errors to a list, which is sent in email notification.""" self.log.error(errStr, varMsg) self.errors.append(errStr + " " + varMsg) def beginExecution(self): """Reload the configurations before every cycle.""" self.setup = self.am_getOption("Setup", self.setup) self.enabled = self.am_getOption("EnableFlag", self.enabled) self.restartAgents = self.am_getOption("RestartAgents", self.restartAgents) self.restartExecutors = self.am_getOption("RestartExecutors", self.restartExecutors) self.restartServices = self.am_getOption("RestartServices", self.restartServices) self.diracLocation = os.environ.get("DIRAC", self.diracLocation) self.addressTo = self.am_getOption('MailTo', self.addressTo) self.addressFrom = self.am_getOption('MailFrom', self.addressFrom) self.controlComponents = self.am_getOption('ControlComponents', self.controlComponents) self.commitURLs = self.am_getOption('CommitURLs', self.commitURLs) self.csAPI = CSAPI() res = self.getRunningInstances(instanceType='Agents') if not res["OK"]: return S_ERROR("Failure to get running agents") self.agents = res["Value"] res = self.getRunningInstances(instanceType='Executors') if not res["OK"]: return S_ERROR("Failure to get running executors") self.executors = res["Value"] res = self.getRunningInstances(instanceType='Services') if not res["OK"]: return S_ERROR("Failure to get running services") self.services = res["Value"] self.accounting.clear() return S_OK() def sendNotification(self): """Send email notification about changes done in the last cycle.""" if not(self.errors or self.accounting): return S_OK() emailBody = "" rows = [] for instanceName, val in self.accounting.iteritems(): rows.append([[instanceName], [val.get('Treatment', 'No Treatment')], [str(val.get('LogAge', 'Not Relevant'))]]) if rows: columns = ["Instance", "Treatment", "Log File Age (Minutes)"] emailBody += printTable(columns, rows, printOut=False, numbering=False, columnSeparator=' | ') if self.errors: emailBody += "\n\nErrors:" emailBody += "\n".join(self.errors) self.log.notice("Sending Email:\n" + emailBody) for address in self.addressTo: res = self.nClient.sendMail(address, self.emailSubject, emailBody, self.addressFrom, localAttempt=False) if not res['OK']: self.log.error("Failure to send Email notification to ", address) continue self.errors = [] self.accounting.clear() return S_OK() def getRunningInstances(self, instanceType='Agents', runitStatus='Run'): """Return a dict of running agents, executors or services. Key is agent's name, value contains dict with PollingTime, PID, Port, Module, RunitStatus, LogFileLocation :param str instanceType: 'Agents', 'Executors', 'Services' :param str runitStatus: Return only those instances with given RunitStatus or 'All' :returns: Dictionary of running instances """ res = self.sysAdminClient.getOverallStatus() if not res["OK"]: self.logError("Failure to get %s from system administrator client" % instanceType, res["Message"]) return res val = res['Value'][instanceType] runningAgents = defaultdict(dict) for system, agents in val.iteritems(): for agentName, agentInfo in agents.iteritems(): if agentInfo['Setup'] and agentInfo['Installed']: if runitStatus != 'All' and agentInfo['RunitStatus'] != runitStatus: continue confPath = cfgPath('/Systems/' + system + '/' + self.setup + '/%s/' % instanceType + agentName) for option, default in (('PollingTime', HOUR), ('Port', None)): optPath = os.path.join(confPath, option) runningAgents[agentName][option] = gConfig.getValue(optPath, default) runningAgents[agentName]["LogFileLocation"] = \ os.path.join(self.diracLocation, 'runit', system, agentName, 'log', 'current') runningAgents[agentName]["PID"] = agentInfo["PID"] runningAgents[agentName]['Module'] = agentInfo['Module'] runningAgents[agentName]['RunitStatus'] = agentInfo['RunitStatus'] runningAgents[agentName]['System'] = system return S_OK(runningAgents) def on_terminate(self, agentName, process): """Execute callback when a process terminates gracefully.""" self.log.info("%s's process with ID: %s has been terminated successfully" % (agentName, process.pid)) def execute(self): """Execute checks for agents, executors, services.""" for instanceType in ('executor', 'agent', 'service'): for name, options in getattr(self, instanceType + 's').iteritems(): # call checkAgent, checkExecutor, checkService res = getattr(self, 'check' + instanceType.capitalize())(name, options) if not res['OK']: self.logError("Failure when checking %s" % instanceType, "%s, %s" % (name, res['Message'])) res = self.componentControl() if not res['OK']: if "Stopped does not exist" not in res['Message'] and \ "Running does not exist" not in res['Message']: self.logError("Failure to control components", res['Message']) if not self.errors: res = self.checkURLs() if not res['OK']: self.logError("Failure to check URLs", res['Message']) else: self.logError('Something was wrong before, not checking URLs this time') self.sendNotification() if self.errors: return S_ERROR("Error during this cycle, check log") return S_OK() @staticmethod def getLastAccessTime(logFileLocation): """Return the age of log file.""" lastAccessTime = 0 try: lastAccessTime = os.path.getmtime(logFileLocation) lastAccessTime = datetime.fromtimestamp(lastAccessTime) except OSError as e: return S_ERROR('Failed to access logfile %s: %r' % (logFileLocation, e)) now = datetime.now() age = now - lastAccessTime return S_OK(age) def restartInstance(self, pid, instanceName, enabled): """Kill a process which is then restarted automatically.""" if not (self.enabled and enabled): self.log.info("Restarting is disabled, please restart %s manually" % instanceName) self.accounting[instanceName]["Treatment"] = "Please restart it manually" return S_OK(NO_RESTART) try: agentProc = psutil.Process(int(pid)) processesToTerminate = agentProc.children(recursive=True) processesToTerminate.append(agentProc) for proc in processesToTerminate: proc.terminate() _gone, alive = psutil.wait_procs(processesToTerminate, timeout=5, callback=partial(self.on_terminate, instanceName)) for proc in alive: self.log.info("Forcefully killing process %s" % proc.pid) proc.kill() return S_OK() except psutil.Error as err: self.logError("Exception occurred in terminating processes", "%s" % err) return S_ERROR() def checkService(self, serviceName, options): """Ping the service, restart if the ping does not respond.""" url = self._getURL(serviceName, options) self.log.info("Pinging service", url) pingRes = Client().ping(url=url) if not pingRes['OK']: self.log.info('Failure pinging service: %s: %s' % (url, pingRes['Message'])) res = self.restartInstance(int(options['PID']), serviceName, self.restartServices) if not res["OK"]: return res elif res['OK'] and res['Value'] != NO_RESTART: self.accounting[serviceName]["Treatment"] = "Successfully Restarted" self.log.info("Agent %s has been successfully restarted" % serviceName) self.log.info("Service responded OK") return S_OK() def checkAgent(self, agentName, options): """Check the age of agent's log file, if it is too old then restart the agent.""" pollingTime, currentLogLocation, pid = options['PollingTime'], options['LogFileLocation'], options['PID'] self.log.info("Checking Agent: %s" % agentName) self.log.info("Polling Time: %s" % pollingTime) self.log.info("Current Log File location: %s" % currentLogLocation) res = self.getLastAccessTime(currentLogLocation) if not res["OK"]: return res age = res["Value"] self.log.info("Current log file for %s is %d minutes old" % (agentName, (age.seconds / MINUTES))) maxLogAge = max(pollingTime + HOUR, 2 * HOUR) if age.seconds < maxLogAge: return S_OK() self.log.info("Current log file is too old for Agent %s" % agentName) self.accounting[agentName]["LogAge"] = age.seconds / MINUTES res = self.restartInstance(int(pid), agentName, self.restartAgents) if not res["OK"]: return res elif res['OK'] and res['Value'] != NO_RESTART: self.accounting[agentName]["Treatment"] = "Successfully Restarted" self.log.info("Agent %s has been successfully restarted" % agentName) return S_OK() def checkExecutor(self, executor, options): """Check the age of executor log file, if too old check for jobs in checking status, then restart the executors.""" currentLogLocation = options['LogFileLocation'] pid = options['PID'] self.log.info("Checking executor: %s" % executor) self.log.info("Current Log File location: %s" % currentLogLocation) res = self.getLastAccessTime(currentLogLocation) if not res["OK"]: return res age = res["Value"] self.log.info("Current log file for %s is %d minutes old" % (executor, (age.seconds / MINUTES))) if age.seconds < 2 * HOUR: return S_OK() self.log.info("Current log file is too old for Executor %s" % executor) self.accounting[executor]["LogAge"] = age.seconds / MINUTES res = self.checkForCheckingJobs(executor) if not res['OK']: return res if res['OK'] and res['Value'] == NO_CHECKING_JOBS: self.accounting.pop(executor, None) return S_OK(NO_RESTART) res = self.restartInstance(int(pid), executor, self.restartExecutors) if not res["OK"]: return res elif res['OK'] and res['Value'] != NO_RESTART: self.accounting[executor]["Treatment"] = "Successfully Restarted" self.log.info("Executor %s has been successfully restarted" % executor) return S_OK() def checkForCheckingJobs(self, executorName): """Check if there are checking jobs with the **executorName** as current MinorStatus.""" attrDict = {'Status': 'Checking', 'MinorStatus': executorName} # returns list of jobs IDs resJobs = self.jobMonClient.getJobs(attrDict) if not resJobs['OK']: self.logError("Could not get jobs for this executor", "%s: %s" % (executorName, resJobs['Message'])) return resJobs if resJobs['Value']: self.log.info("Found %d jobs in 'Checking' status for %s" % (len(resJobs['Value']), executorName)) return S_OK(CHECKING_JOBS) self.log.info("Found no jobs in 'Checking' status for %s" % executorName) return S_OK(NO_CHECKING_JOBS) def componentControl(self): """Monitor and control component status as defined in the CS. Check for running and stopped components and ensure they have the proper status as defined in the CS Registry/Hosts/_HOST_/[Running|Stopped] sections :returns: :func:`~DIRAC:DIRAC.Core.Utilities.ReturnValues.S_OK`, :func:`~DIRAC:DIRAC.Core.Utilities.ReturnValues.S_ERROR` """ # get the current status of the components resCurrent = self._getCurrentComponentStatus() if not resCurrent['OK']: return resCurrent currentStatus = resCurrent['Value'] resDefault = self._getDefaultComponentStatus() if not resDefault['OK']: return resDefault defaultStatus = resDefault['Value'] # ensure instances are in the right state shouldBe = {} shouldBe['Run'] = defaultStatus['Run'].intersection(currentStatus['Down']) shouldBe['Down'] = defaultStatus['Down'].intersection(currentStatus['Run']) shouldBe['Unknown'] = defaultStatus['All'].symmetric_difference(currentStatus['All']) self._ensureComponentRunning(shouldBe['Run']) self._ensureComponentDown(shouldBe['Down']) for instance in shouldBe['Unknown']: self.logError("Unknown instance", "%r, either uninstall or add to config" % instance) return S_OK() def _getCurrentComponentStatus(self): """Get current status for components.""" resOverall = self.sysAdminClient.getOverallStatus() if not resOverall['OK']: return resOverall currentStatus = {'Down': set(), 'Run': set(), 'All': set()} informationDict = resOverall['Value'] for systemsDict in informationDict.values(): for system, instancesDict in systemsDict.items(): for instanceName, instanceInfoDict in instancesDict.items(): identifier = '%s__%s' % (system, instanceName) runitStatus = instanceInfoDict.get('RunitStatus') if runitStatus in ('Run', 'Down'): currentStatus[runitStatus].add(identifier) currentStatus['All'] = currentStatus['Run'] | currentStatus['Down'] return S_OK(currentStatus) def _getDefaultComponentStatus(self): """Get the configured status of the components.""" host = socket.gethostname() defaultStatus = {'Down': set(), 'Run': set(), 'All': set()} resRunning = gConfig.getOptionsDict(os.path.join('/Registry/Hosts/', host, 'Running')) resStopped = gConfig.getOptionsDict(os.path.join('/Registry/Hosts/', host, 'Stopped')) if not resRunning['OK']: return resRunning if not resStopped['OK']: return resStopped defaultStatus['Run'] = set(resRunning['Value'].keys()) defaultStatus['Down'] = set(resStopped['Value'].keys()) defaultStatus['All'] = defaultStatus['Run'] | defaultStatus['Down'] if defaultStatus['Run'].intersection(defaultStatus['Down']): self.logError("Overlap in configuration", str(defaultStatus['Run'].intersection(defaultStatus['Down']))) return S_ERROR("Bad host configuration") return S_OK(defaultStatus) def _ensureComponentRunning(self, shouldBeRunning): """Ensure the correct components are running.""" for instance in shouldBeRunning: self.log.info("Starting instance %s" % instance) system, name = instance.split('__') if self.controlComponents: res = self.sysAdminClient.startComponent(system, name) if not res['OK']: self.logError("Failed to start component:", "%s: %s" % (instance, res['Message'])) else: self.accounting[instance]["Treatment"] = "Instance was down, started instance" else: self.accounting[instance]["Treatment"] = "Instance is down, should be started" def _ensureComponentDown(self, shouldBeDown): """Ensure the correct components are not running.""" for instance in shouldBeDown: self.log.info("Stopping instance %s" % instance) system, name = instance.split('__') if self.controlComponents: res = self.sysAdminClient.stopComponent(system, name) if not res['OK']: self.logError("Failed to stop component:", "%s: %s" % (instance, res['Message'])) else: self.accounting[instance]["Treatment"] = "Instance was running, stopped instance" else: self.accounting[instance]["Treatment"] = "Instance is running, should be stopped" def checkURLs(self): """Ensure that the running services have their URL in the Config.""" self.log.info("Checking URLs") # get services again, in case they were started/stop in controlComponents gConfig.forceRefresh(fromMaster=True) res = self.getRunningInstances(instanceType='Services', runitStatus='All') if not res["OK"]: return S_ERROR("Failure to get running services") self.services = res["Value"] for service, options in self.services.iteritems(): self.log.debug("Checking URL for %s with options %s" % (service, options)) # ignore SystemAdministrator, does not have URLs if 'SystemAdministrator' in service: continue self._checkServiceURL(service, options) if self.csAPI.csModified and self.commitURLs: self.log.info("Commiting changes to the CS") result = self.csAPI.commit() if not result['OK']: self.logError('Commit to CS failed', result['Message']) return S_ERROR("Failed to commit to CS") return S_OK() def _checkServiceURL(self, serviceName, options): """Ensure service URL is properly configured in the CS.""" url = self._getURL(serviceName, options) system = options['System'] module = options['Module'] self.log.info("Checking URLs for %s/%s" % (system, module)) urlsConfigPath = os.path.join('/Systems', system, self.setup, 'URLs', module) urls = gConfig.getValue(urlsConfigPath, []) self.log.debug("Found configured URLs for %s: %s" % (module, urls)) self.log.debug("This URL is %s" % url) runitStatus = options['RunitStatus'] wouldHave = 'Would have ' if not self.commitURLs else '' if runitStatus == 'Run' and url not in urls: urls.append(url) message = "%sAdded URL %s to URLs for %s/%s" % (wouldHave, url, system, module) self.log.info(message) self.accounting[serviceName + "/URL"]["Treatment"] = message self.csAPI.modifyValue(urlsConfigPath, ",".join(urls)) if runitStatus == 'Down' and url in urls: urls.remove(url) message = "%sRemoved URL %s from URLs for %s/%s" % (wouldHave, url, system, module) self.log.info(message) self.accounting[serviceName + "/URL"]["Treatment"] = message self.csAPI.modifyValue(urlsConfigPath, ",".join(urls)) @staticmethod def _getURL(serviceName, options): """Return URL for the service.""" system = options['System'] port = options['Port'] host = socket.gethostname() url = 'dips://%s:%s/%s/%s' % (host, port, system, serviceName) return url
def do_install(self, args): """ Install various DIRAC components usage: install mysql install db <database> install service <system> <service> [-m <ModuleName>] [-p <Option>=<Value>] [-p <Option>=<Value>] ... install agent <system> <agent> [-m <ModuleName>] [-p <Option>=<Value>] [-p <Option>=<Value>] ... install executor <system> <executor> [-m <ModuleName>] [-p <Option>=<Value>] [-p <Option>=<Value>] ... """ argss = args.split() if not argss: print self.do_install.__doc__ return option = argss[0] del argss[0] if option == "mysql": print "Installing MySQL database, this can take a while ..." client = SystemAdministratorClient(self.host, self.port) if InstallTools.mysqlPassword == 'LocalConfig': InstallTools.mysqlPassword = '' InstallTools.getMySQLPasswords() result = client.installMySQL(InstallTools.mysqlRootPwd, InstallTools.mysqlPassword) if not result['OK']: self.__errMsg(result['Message']) else: print "MySQL:", result['Value'] print "You might need to restart SystemAdministrator service to take new settings into account" elif option == "db": if not argss: print self.do_install.__doc__ return database = argss[0] client = SystemAdministratorClient(self.host, self.port) result = client.getAvailableDatabases() if not result['OK']: self.__errMsg("Can not get database list: %s" % result['Message']) return if not result['Value'].has_key(database): self.__errMsg("Unknown database %s: " % database) return system = result['Value'][database]['System'] setup = gConfig.getValue('/DIRAC/Setup', '') if not setup: self.__errMsg("Unknown current setup") return instance = gConfig.getValue( '/DIRAC/Setups/%s/%s' % (setup, system), '') if not instance: self.__errMsg("No instance defined for system %s" % system) self.__errMsg( "\tAdd new instance with 'add instance %s <instance_name>'" % system) return if not InstallTools.mysqlPassword: InstallTools.mysqlPassword = '******' InstallTools.getMySQLPasswords() result = client.installDatabase(database, InstallTools.mysqlRootPwd) if not result['OK']: self.__errMsg(result['Message']) return extension, system = result['Value'] # result = client.addDatabaseOptionsToCS( system, database ) InstallTools.mysqlHost = self.host result = client.getInfo() if not result['OK']: self.__errMsg(result['Message']) hostSetup = result['Value']['Setup'] result = InstallTools.addDatabaseOptionsToCS( gConfig, system, database, hostSetup) if not result['OK']: self.__errMsg(result['Message']) return print "Database %s from %s/%s installed successfully" % ( database, extension, system) elif option in ["service", "agent", "executor"]: if len(argss) < 2: print self.do_install.__doc__ return system = argss[0] del argss[0] component = argss[0] del argss[0] specialOptions = {} module = '' for i in range(len(argss)): if argss[i] == "-m": specialOptions['Module'] = argss[i + 1] module = argss[i + 1] if argss[i] == "-p": opt, value = argss[i + 1].split('=') specialOptions[opt] = value if module == component: module = '' client = SystemAdministratorClient(self.host, self.port) # First need to update the CS # result = client.addDefaultOptionsToCS( option, system, component ) InstallTools.host = self.host result = client.getInfo() if not result['OK']: self.__errMsg(result['Message']) return hostSetup = result['Value']['Setup'] # Install Module section if not yet there if module: result = InstallTools.addDefaultOptionsToCS( gConfig, option, system, module, getCSExtensions(), hostSetup) # Add component section with specific parameters only result = InstallTools.addDefaultOptionsToCS( gConfig, option, system, component, getCSExtensions(), hostSetup, specialOptions, addDefaultOptions=False) else: # Install component section result = InstallTools.addDefaultOptionsToCS( gConfig, option, system, component, getCSExtensions(), hostSetup, specialOptions) if not result['OK']: self.__errMsg(result['Message']) return # Then we can install and start the component result = client.setupComponent(option, system, component, module) if not result['OK']: self.__errMsg(result['Message']) return compType = result['Value']['ComponentType'] runit = result['Value']['RunitStatus'] print "%s %s_%s is installed, runit status: %s" % ( compType, system, component, runit) else: print "Unknown option:", option
def __componentAction( self , action = None ): """ Actions which should be done on components. The only parameters is an action to perform. Returns standard JSON response structure with with service response or error messages """ DN = getUserDN() group = getSelectedGroup() if ( not action ) or ( not len( action ) > 0 ): error = "Action is not defined or has zero length" gLogger.debug( error ) return { "success" : "false" , "error" : error } if action not in [ "restart" , "start" , "stop" , "uninstall" ]: error = "The request parameters action '%s' is unknown" % action gLogger.debug( error ) return { "success" : "false" , "error" : error } self.action = action result = dict() for i in request.params: if i == "action": continue target = i.split( " @ " , 1 ) if not len( target ) == 2: continue system = request.params[ i ] gLogger.always( "System: %s" % system ) host = target[ 1 ] gLogger.always( "Host: %s" % host ) component = target[ 0 ] gLogger.always( "Component: %s" % component ) if not host in result: result[ host ] = list() result[ host ].append( [ system , component ] ) if not len( result ) > 0: error = "Failed to get component(s) for %s" % action gLogger.debug( error ) return { "success" : "false" , "error" : error } gLogger.always( result ) self.actionSuccess = list() self.actionFailed = list() for hostname in result.keys(): if not len( result[ hostname ] ) > 0: continue client = SystemAdministratorClient( hostname , None , delegatedDN=DN , delegatedGroup=group ) for i in result[ hostname ]: system = i[ 0 ] component = i[ 1 ] try: if action == "restart": result = client.restartComponent( system , component ) elif action == "start": result = client.startComponent( system , component ) elif action == "stop": result = client.stopComponent( system , component ) elif action == "uninstall": result = client.uninstallComponent( system , component ) else: result = list() result[ "Message" ] = "Action %s is not valid" % action except Exception, x: result = list() result[ "Message" ] = "Exception: %s" % str( x ) gLogger.debug( "Result: %s" % result ) if not result[ "OK" ]: error = hostname + ": " + result[ "Message" ] self.actionFailed.append( error ) gLogger.error( "Failure during component %s: %s" % ( action , error ) ) else: gLogger.always( "Successfully %s component %s" % ( action , component ) ) self.actionSuccess.append( component )
def do_install( self, args ): """ Install various DIRAC components usage: install mysql install db <database> install service <system> <service> [-m <ModuleName>] [-p <Option>=<Value>] [-p <Option>=<Value>] ... install agent <system> <agent> [-m <ModuleName>] [-p <Option>=<Value>] [-p <Option>=<Value>] ... install executor <system> <executor> [-m <ModuleName>] [-p <Option>=<Value>] [-p <Option>=<Value>] ... """ argss = args.split() if not argss: gLogger.notice( self.do_install.__doc__ ) return option = argss[0] del argss[0] if option == "mysql": gLogger.notice( "Installing MySQL database, this can take a while ..." ) client = SystemAdministratorClient( self.host, self.port ) if gComponentInstaller.mysqlPassword == 'LocalConfig': gComponentInstaller.mysqlPassword = '' gComponentInstaller.getMySQLPasswords() result = client.installMySQL( gComponentInstaller.mysqlRootPwd, gComponentInstaller.mysqlPassword ) if not result['OK']: self._errMsg( result['Message'] ) else: gLogger.notice( "MySQL:", result['Value'] ) gLogger.notice( "You might need to restart SystemAdministrator service to take new settings into account" ) elif option == "db": if not argss: gLogger.notice( self.do_install.__doc__ ) return database = argss[0] client = SystemAdministratorClient( self.host, self.port ) result = client.getAvailableDatabases() if not result['OK']: self._errMsg( "Can not get database list: %s" % result['Message'] ) return if not result['Value'].has_key( database ): self._errMsg( "Unknown database %s: " % database ) return system = result['Value'][database]['System'] setup = gConfig.getValue( '/DIRAC/Setup', '' ) if not setup: self._errMsg( "Unknown current setup" ) return instance = gConfig.getValue( '/DIRAC/Setups/%s/%s' % ( setup, system ), '' ) if not instance: self._errMsg( "No instance defined for system %s" % system ) self._errMsg( "\tAdd new instance with 'add instance %s <instance_name>'" % system ) return if not gComponentInstaller.mysqlPassword: gComponentInstaller.mysqlPassword = '******' gComponentInstaller.getMySQLPasswords() result = client.installDatabase( database, gComponentInstaller.mysqlRootPwd ) if not result['OK']: self._errMsg( result['Message'] ) return extension, system = result['Value'] result = client.getHostInfo() if not result[ 'OK' ]: self._errMsg( result[ 'Message' ] ) return else: cpu = result[ 'Value' ][ 'CPUModel' ] hostname = self.host if not result[ 'OK' ]: self._errMsg( result[ 'Message' ] ) return if database != 'InstalledComponentsDB': result = MonitoringUtilities.monitorInstallation( 'DB', system.replace( 'System', '' ), database, cpu = cpu, hostname = hostname ) if not result['OK']: self._errMsg( result['Message'] ) return # result = client.addDatabaseOptionsToCS( system, database ) gComponentInstaller.mysqlHost = self.host result = client.getInfo() if not result['OK']: self._errMsg( result['Message'] ) hostSetup = result['Value']['Setup'] result = gComponentInstaller.addDatabaseOptionsToCS( gConfig, system, database, hostSetup, overwrite = True ) if not result['OK']: self._errMsg( result['Message'] ) return gLogger.notice( "Database %s from %s/%s installed successfully" % ( database, extension, system ) ) elif option in self.runitComponents: if len( argss ) < 2: gLogger.notice( self.do_install.__doc__ ) return system = argss[0] del argss[0] component = argss[0] del argss[0] specialOptions = {} module = '' for i in range(len(argss)): if argss[i] == "-m": specialOptions['Module'] = argss[i+1] module = argss[i+1] if argss[i] == "-p": opt,value = argss[i+1].split('=') specialOptions[opt] = value if module == component: module = '' client = SystemAdministratorClient( self.host, self.port ) # First need to update the CS # result = client.addDefaultOptionsToCS( option, system, component ) gComponentInstaller.host = self.host result = client.getInfo() if not result['OK']: self._errMsg( result['Message'] ) return hostSetup = result['Value']['Setup'] # Install Module section if not yet there if module: result = gComponentInstaller.addDefaultOptionsToCS( gConfig, option, system, module, getCSExtensions(), hostSetup ) # in case of Error we must stop, this can happen when the module name is wrong... if not result['OK']: self._errMsg( result['Message'] ) return # Add component section with specific parameters only result = gComponentInstaller.addDefaultOptionsToCS( gConfig, option, system, component, getCSExtensions(), hostSetup, specialOptions, addDefaultOptions = True ) else: # Install component section result = gComponentInstaller.addDefaultOptionsToCS( gConfig, option, system, component, getCSExtensions(), hostSetup, specialOptions ) if not result['OK']: self._errMsg( result['Message'] ) return # Then we can install and start the component result = client.setupComponent( option, system, component, module ) if not result['OK']: self._errMsg( result['Message'] ) return compType = result['Value']['ComponentType'] runit = result['Value']['RunitStatus'] gLogger.notice( "%s %s_%s is installed, runit status: %s" % ( compType, system, component, runit ) ) # And register it in the database result = client.getHostInfo() if not result[ 'OK' ]: self._errMsg( result[ 'Message' ] ) return else: cpu = result[ 'Value' ][ 'CPUModel' ] hostname = self.host if component == 'ComponentMonitoring': # Make sure that the service is running before trying to use it nTries = 0 maxTries = 5 mClient = ComponentMonitoringClient() result = mClient.ping() while not result[ 'OK' ] and nTries < maxTries: time.sleep( 3 ) result = mClient.ping() nTries = nTries + 1 if not result[ 'OK' ]: self._errMsg( 'ComponentMonitoring service taking too long to start. Installation will not be logged into the database' ) return result = MonitoringUtilities.monitorInstallation( 'DB', system, 'InstalledComponentsDB', cpu = cpu, hostname = hostname ) if not result['OK']: self._errMsg( 'Error registering installation into database: %s' % result[ 'Message' ] ) return result = MonitoringUtilities.monitorInstallation( option, system, component, module, cpu = cpu, hostname = hostname ) if not result['OK']: self._errMsg( 'Error registering installation into database: %s' % result[ 'Message' ] ) return else: gLogger.notice( "Unknown option:", option )
class LemonAgent( AgentModule ): def initialize( self ): self.NON_CRITICAL = "NonCritical" self.CRITICAL = "Critical" self.FAILURE = "FAILURE" self.OK = "OK" self.setup = gConfig.getValue('/DIRAC/Setup','LHCb-Development') self.outputNonCritical = True #all components not present here will be treated as non critical self.admClient = SystemAdministratorClient('localhost') return S_OK() def execute( self ): """ Main execution method """ monitoredSetups = gConfig.getValue('/Operations/lhcb/Lemon/MonitoredSetups', ['LHCb-Production']) self.monitoringEnabled = self.setup in monitoredSetups if not self.monitoringEnabled: self._log("Framework/LemonAgent", self.NON_CRITICAL, self.OK, "Monitoring not enabled for this setup: " + self.setup +". Exiting."); return S_OK() hostsInMaintenance = gConfig.getValue('/Operations/lhcb/Lemon/HostsInMaintenance',[]); if gethostname() in hostsInMaintenance: self._log("Framework/LemonAgent", self.NON_CRITICAL, self.OK, "I am in maintenance mode, exiting."); return S_OK() result = self.admClient.getOverallStatus() if not result or not result['OK']: self._log("Framework/LemonAgent", self.CRITICAL, self.FAILURE, "Can not obtain result!!"); return S_OK() services = result[ 'Value' ][ 'Services' ] agents = result[ 'Value' ][ 'Agents' ] self._processResults(services); self._processResults(agents); return S_OK() def _processResults(self, results): for system in results: for part in results[system]: component = results[system][part] componentName = system + "/" + part if component['Setup'] == True: #we want to monitor only set up services and agents critLevel = self._getCriticality(componentName) if critLevel == self.NON_CRITICAL and self.outputNonCritical == False: continue if component['RunitStatus'] == 'Run': self._log(componentName, self._getCriticality(componentName), self.OK, "Service/Agent running fine"); else: self._log(componentName, self._getCriticality(componentName), self.FAILURE, "Service/Agent failure!"); # else: # if component['Installed'] == True: # print componentName + " is installed but not set up" def _getCriticality(self, component): #lets try to retrieve common criticality first criticality = gConfig.getValue('/Operations/lhcb/Lemon/Criticalities/' + component, self.NON_CRITICAL) #maybe it got redefined in <setup> subtree: criticality = gConfig.getValue('/Operations/lhcb/' + self.setup + '/Lemon/Criticalities/' + component, criticality) return criticality def _log( self, component, criticality, status, string ): gLogger.info( "LEMON " + criticality + " " + status + " " + component + ": " +string + "\n")
def do_show( self, args ): """ Show list of components with various related information usage: show software - show components for which software is available show installed - show components installed in the host with runit system show setup - show components set up for automatic running in the host show project - show project to install or upgrade show status - show status of the installed components show database - show status of the databases show mysql - show status of the MySQL server show log <system> <service|agent> [nlines] - show last <nlines> lines in the component log file show info - show version of software and setup show host - show host related parameters show errors [*|<system> <service|agent>] - show error count for the given component or all the components in the last hour and day """ argss = args.split() if not argss: print self.do_show.__doc__ return option = argss[0] del argss[0] if option == 'software': client = SystemAdministratorClient( self.host, self.port ) result = client.getSoftwareComponents() if not result['OK']: self.__errMsg( result['Message'] ) else: print pprint.pprint( result['Value'] ) elif option == 'installed': client = SystemAdministratorClient( self.host, self.port ) result = client.getInstalledComponents() if not result['OK']: self.__errMsg( result['Message'] ) else: print pprint.pprint( result['Value'] ) elif option == 'setup': client = SystemAdministratorClient( self.host, self.port ) result = client.getSetupComponents() if not result['OK']: self.__errMsg( result['Message'] ) else: print pprint.pprint( result['Value'] ) elif option == 'project': result = SystemAdministratorClient( self.host, self.port ).getProject() if not result['OK']: self.__errMsg( result['Message'] ) else: print "Current project is %s" % result[ 'Value' ] elif option == 'status': client = SystemAdministratorClient( self.host, self.port ) result = client.getOverallStatus() if not result['OK']: self.__errMsg( result['Message'] ) else: fields = ["System",'Name','Module','Type','Setup','Installed','Runit','Uptime','PID'] records = [] rDict = result['Value'] for compType in rDict: for system in rDict[compType]: components = rDict[compType][system].keys() components.sort() for component in components: record = [] if rDict[compType][system][component]['Installed']: module = str( rDict[compType][system][component]['Module'] ) record += [ system,component,module,compType.lower()[:-1]] if rDict[compType][system][component]['Setup']: record += ['Setup'] else: record += ['NotSetup'] if rDict[compType][system][component]['Installed']: record += ['Installed'] else: record += ['NotInstalled'] record += [str( rDict[compType][system][component]['RunitStatus'] )] record += [str( rDict[compType][system][component]['Timeup'] )] record += [str( rDict[compType][system][component]['PID'] )] records.append(record) printTable(fields,records) elif option == 'database' or option == 'databases': client = SystemAdministratorClient( self.host, self.port ) if not InstallTools.mysqlPassword: InstallTools.mysqlPassword = "******" InstallTools.getMySQLPasswords() result = client.getDatabases( InstallTools.mysqlRootPwd ) if not result['OK']: self.__errMsg( result['Message'] ) return resultSW = client.getAvailableDatabases() if not resultSW['OK']: self.__errMsg( resultSW['Message'] ) return sw = resultSW['Value'] installed = result['Value'] print for db in sw: if db in installed: print db.rjust( 25 ), ': Installed' else: print db.rjust( 25 ), ': Not installed' if not sw: print "No database found" elif option == 'mysql': client = SystemAdministratorClient( self.host, self.port ) result = client.getMySQLStatus() if not result['OK']: self.__errMsg( result['Message'] ) elif result['Value']: print for par, value in result['Value'].items(): print par.rjust( 28 ), ':', value else: print "No MySQL database found" elif option == "log": self.getLog( argss ) elif option == "info": client = SystemAdministratorClient( self.host, self.port ) result = client.getInfo() if not result['OK']: self.__errMsg( result['Message'] ) else: print print "Setup:", result['Value']['Setup'] print "DIRAC version:", result['Value']['DIRAC'] if result['Value']['Extensions']: for e, v in result['Value']['Extensions'].items(): print "%s version" % e, v print elif option == "host": client = SystemAdministratorClient( self.host, self.port ) result = client.getHostInfo() if not result['OK']: self.__errMsg( result['Message'] ) else: print print "Host info:" print fields = ['Parameter','Value'] records = [] for key,value in result['Value'].items(): records.append( [key, str(value) ] ) printTable( fields, records ) elif option == "errors": self.getErrors( argss ) else: print "Unknown option:", option
def do_show( self, args ): """ Show list of components with various related information usage: show software - show components for which software is available show installed - show components installed in the host with runit system show setup - show components set up for automatic running in the host show project - show project to install or upgrade show status - show status of the installed components show database - show status of the databases show mysql - show status of the MySQL server show log <system> <service|agent> [nlines] - show last <nlines> lines in the component log file show info - show version of software and setup show doc <type> <system> <name> - show documentation for a given service or agent show host - show host related parameters show hosts - show all available hosts show installations [ list | current | -n <Name> | -h <Host> | -s <System> | -m <Module> | -t <Type> | -itb <InstallationTime before> | -ita <InstallationTime after> | -utb <UnInstallationTime before> | -uta <UnInstallationTime after> ]* - show all the installations of components that match the given parameters show errors [*|<system> <service|agent>] - show error count for the given component or all the components in the last hour and day """ argss = args.split() if not argss: gLogger.notice( self.do_show.__doc__ ) return option = argss[0] del argss[0] if option == 'software': client = SystemAdministratorClient( self.host, self.port ) result = client.getSoftwareComponents() if not result['OK']: self.__errMsg( result['Message'] ) else: gLogger.notice( '' ) pprint.pprint( result['Value'] ) elif option == 'installed': client = SystemAdministratorClient( self.host, self.port ) result = client.getInstalledComponents() if not result['OK']: self.__errMsg( result['Message'] ) else: gLogger.notice( '' ) pprint.pprint( result['Value'] ) elif option == 'setup': client = SystemAdministratorClient( self.host, self.port ) result = client.getSetupComponents() if not result['OK']: self.__errMsg( result['Message'] ) else: gLogger.notice( '' ) pprint.pprint( result['Value'] ) elif option == 'project': result = SystemAdministratorClient( self.host, self.port ).getProject() if not result['OK']: self.__errMsg( result['Message'] ) else: gLogger.notice( "Current project is %s" % result[ 'Value' ] ) elif option == 'status': client = SystemAdministratorClient( self.host, self.port ) result = client.getOverallStatus() if not result['OK']: self.__errMsg( result['Message'] ) else: fields = ["System",'Name','Module','Type','Setup','Installed','Runit','Uptime','PID'] records = [] rDict = result['Value'] for compType in rDict: for system in rDict[compType]: components = rDict[compType][system].keys() components.sort() for component in components: record = [] if rDict[compType][system][component]['Installed']: module = str( rDict[compType][system][component]['Module'] ) record += [ system,component,module,compType.lower()[:-1]] if rDict[compType][system][component]['Setup']: record += ['Setup'] else: record += ['NotSetup'] if rDict[compType][system][component]['Installed']: record += ['Installed'] else: record += ['NotInstalled'] record += [str( rDict[compType][system][component]['RunitStatus'] )] record += [str( rDict[compType][system][component]['Timeup'] )] record += [str( rDict[compType][system][component]['PID'] )] records.append(record) printTable(fields,records) elif option == 'database' or option == 'databases': client = SystemAdministratorClient( self.host, self.port ) if not InstallTools.mysqlPassword: InstallTools.mysqlPassword = "******" InstallTools.getMySQLPasswords() result = client.getDatabases( InstallTools.mysqlRootPwd ) if not result['OK']: self.__errMsg( result['Message'] ) return resultSW = client.getAvailableDatabases() if not resultSW['OK']: self.__errMsg( resultSW['Message'] ) return sw = resultSW['Value'] installed = result['Value'] gLogger.notice( '' ) for db in sw: if db in installed: gLogger.notice( db.rjust( 25 ), ': Installed' ) else: gLogger.notice( db.rjust( 25 ), ': Not installed' ) if not sw: gLogger.notice( "No database found" ) elif option == 'mysql': client = SystemAdministratorClient( self.host, self.port ) result = client.getMySQLStatus() if not result['OK']: self.__errMsg( result['Message'] ) elif result['Value']: gLogger.notice( '' ) for par, value in result['Value'].items(): gLogger.notice( ( par.rjust( 28 ), ':', value ) ) else: gLogger.notice( "No MySQL database found" ) elif option == "log": self.getLog( argss ) elif option == "info": client = SystemAdministratorClient( self.host, self.port ) result = client.getInfo() if not result['OK']: self.__errMsg( result['Message'] ) else: gLogger.notice( '' ) gLogger.notice( "Setup:", result['Value']['Setup'] ) gLogger.notice( "DIRAC version:", result['Value']['DIRAC'] ) if result['Value']['Extensions']: for e, v in result['Value']['Extensions'].items(): gLogger.notice( "%s version" % e, v ) gLogger.notice( '' ) elif option == "host": client = SystemAdministratorClient( self.host, self.port ) result = client.getHostInfo() if not result['OK']: self.__errMsg( result['Message'] ) else: gLogger.notice( '' ) gLogger.notice( "Host info:" ) gLogger.notice( '' ) fields = ['Parameter','Value'] records = [] for key, value in result['Value'].items(): records.append( [key, str( value ) ] ) printTable( fields, records ) elif option == "hosts": client = ComponentMonitoringClient() result = client.getHosts( {}, False, False ) if not result[ 'OK' ]: self.__errMsg( 'Error retrieving the list of hosts: %s' % ( result[ 'Message' ] ) ) else: hostList = result[ 'Value' ] gLogger.notice( '' ) gLogger.notice( ' ' + 'Host'.center( 32 ) + ' ' + 'CPU'.center( 34 ) + ' ' ) gLogger.notice( ( '-' * 69 ) ) for element in hostList: gLogger.notice( '|' + element[ 'HostName' ].center( 32 ) + '|' + element[ 'CPU' ].center( 34 ) + '|' ) gLogger.notice( ( '-' * 69 ) ) gLogger.notice( '' ) elif option == "errors": self.getErrors( argss ) elif option == "installations": self.getInstallations( argss ) elif option == "doc": if len( argss ) > 2: if argss[0] in [ 'service', 'agent' ]: compType = argss[0] compSystem = argss[1] compModule = argss[2] client = SystemAdministratorClient( self.host, self.port ) result = client.getComponentDocumentation( compType, compSystem, compModule ) if result[ 'OK' ]: gLogger.notice( result[ 'Value' ] ) else: self.__errMsg( result[ 'Message' ] ) else: gLogger.notice( self.do_show.__doc__ ) else: gLogger.notice( self.do_show.__doc__ ) else: gLogger.notice( "Unknown option:", option )
def do_install( self, args ): """ Install various DIRAC components usage: install mysql install db <database> install service <system> <service> [-m <ModuleName>] [-p <Option>=<Value>] [-p <Option>=<Value>] ... install agent <system> <agent> [-m <ModuleName>] [-p <Option>=<Value>] [-p <Option>=<Value>] ... install executor <system> <executor> [-m <ModuleName>] [-p <Option>=<Value>] [-p <Option>=<Value>] ... """ argss = args.split() if not argss: print self.do_install.__doc__ return option = argss[0] del argss[0] if option == "mysql": print "Installing MySQL database, this can take a while ..." client = SystemAdministratorClient( self.host, self.port ) if InstallTools.mysqlPassword == 'LocalConfig': InstallTools.mysqlPassword = '' InstallTools.getMySQLPasswords() result = client.installMySQL( InstallTools.mysqlRootPwd, InstallTools.mysqlPassword ) if not result['OK']: self.__errMsg( result['Message'] ) else: print "MySQL:", result['Value'] print "You might need to restart SystemAdministrator service to take new settings into account" elif option == "db": if not argss: print self.do_install.__doc__ return database = argss[0] client = SystemAdministratorClient( self.host, self.port ) result = client.getAvailableDatabases() if not result['OK']: self.__errMsg( "Can not get database list: %s" % result['Message'] ) return if not result['Value'].has_key( database ): self.__errMsg( "Unknown database %s: " % database ) return system = result['Value'][database]['System'] setup = gConfig.getValue( '/DIRAC/Setup', '' ) if not setup: self.__errMsg( "Unknown current setup" ) return instance = gConfig.getValue( '/DIRAC/Setups/%s/%s' % ( setup, system ), '' ) if not instance: self.__errMsg( "No instance defined for system %s" % system ) self.__errMsg( "\tAdd new instance with 'add instance %s <instance_name>'" % system ) return if not InstallTools.mysqlPassword: InstallTools.mysqlPassword = '******' InstallTools.getMySQLPasswords() result = client.installDatabase( database, InstallTools.mysqlRootPwd ) if not result['OK']: self.__errMsg( result['Message'] ) return extension, system = result['Value'] # result = client.addDatabaseOptionsToCS( system, database ) InstallTools.mysqlHost = self.host result = client.getInfo() if not result['OK']: self.__errMsg( result['Message'] ) hostSetup = result['Value']['Setup'] result = InstallTools.addDatabaseOptionsToCS( gConfig, system, database, hostSetup ) if not result['OK']: self.__errMsg( result['Message'] ) return print "Database %s from %s/%s installed successfully" % ( database, extension, system ) elif option in ["service","agent","executor"] : if len( argss ) < 2: print self.do_install.__doc__ return system = argss[0] del argss[0] component = argss[0] del argss[0] specialOptions = {} module = '' for i in range(len(argss)): if argss[i] == "-m": specialOptions['Module'] = argss[i+1] module = argss[i+1] if argss[i] == "-p": opt,value = argss[i+1].split('=') specialOptions[opt] = value if module == component: module = '' client = SystemAdministratorClient( self.host, self.port ) # First need to update the CS # result = client.addDefaultOptionsToCS( option, system, component ) InstallTools.host = self.host result = client.getInfo() if not result['OK']: self.__errMsg( result['Message'] ) return hostSetup = result['Value']['Setup'] # Install Module section if not yet there if module: result = InstallTools.addDefaultOptionsToCS( gConfig, option, system, module, getCSExtensions(), hostSetup ) # Add component section with specific parameters only result = InstallTools.addDefaultOptionsToCS( gConfig, option, system, component, getCSExtensions(), hostSetup, specialOptions, addDefaultOptions = False ) else: # Install component section result = InstallTools.addDefaultOptionsToCS( gConfig, option, system, component, getCSExtensions(), hostSetup, specialOptions ) if not result['OK']: self.__errMsg( result['Message'] ) return # Then we can install and start the component result = client.setupComponent( option, system, component, module ) if not result['OK']: self.__errMsg( result['Message'] ) return compType = result['Value']['ComponentType'] runit = result['Value']['RunitStatus'] print "%s %s_%s is installed, runit status: %s" % ( compType, system, component, runit ) else: print "Unknown option:", option
def do_install( self, args ): """ Install various DIRAC components usage: install mysql install db <database> install service <system> <service> install agent <system> <agent> """ argss = args.split() if not argss: print self.do_install.__doc__ return option = argss[0] del argss[0] if option == "mysql": print "Installing MySQL database, this can take a while ..." client = SystemAdministratorClient( self.host, self.port ) if InstallTools.mysqlPassword == 'LocalConfig': InstallTools.mysqlPassword = '' InstallTools.getMySQLPasswords() result = client.installMySQL( InstallTools.mysqlRootPwd, InstallTools.mysqlPassword ) if not result['OK']: self.__errMsg( result['Message'] ) else: print "MySQL:", result['Value'] print "You might need to restart SystemAdministrator service to take new settings into account" elif option == "db": if not argss: print self.do_install.__doc__ return database = argss[0] client = SystemAdministratorClient( self.host, self.port ) result = client.getAvailableDatabases() if not result['OK']: self.__errMsg( "Can not get database list: %s" % result['Message'] ) return if not result['Value'].has_key( database ): self.__errMsg( "Unknown database %s: " % database ) return system = result['Value'][database]['System'] setup = gConfig.getValue( '/DIRAC/Setup', '' ) if not setup: self.__errMsg( "Unknown current setup" ) return instance = gConfig.getValue( '/DIRAC/Setups/%s/%s' % ( setup, system ), '' ) if not instance: self.__errMsg( "No instance defined for system %s" % system ) self.__errMsg( "\tAdd new instance with 'add instance %s <instance_name>'" % system ) return if not InstallTools.mysqlPassword: InstallTools.mysqlPassword = '******' InstallTools.getMySQLPasswords() result = client.installDatabase( database, InstallTools.mysqlRootPwd ) if not result['OK']: self.__errMsg( result['Message'] ) return extension, system = result['Value'] # result = client.addDatabaseOptionsToCS( system, database ) InstallTools.mysqlHost = self.host result = client.getInfo() if not result['OK']: self.__errMsg( result['Message'] ) hostSetup = result['Value']['Setup'] result = InstallTools.addDatabaseOptionsToCS( gConfig, system, database, hostSetup ) if not result['OK']: self.__errMsg( result['Message'] ) return print "Database %s from %s/%s installed successfully" % ( database, extension, system ) elif option == "service" or option == "agent": if len( argss ) < 2: print self.do_install.__doc__ return system = argss[0] component = argss[1] client = SystemAdministratorClient( self.host, self.port ) # First need to update the CS # result = client.addDefaultOptionsToCS( option, system, component ) InstallTools.host = self.host result = client.getInfo() if not result['OK']: self.__errMsg( result['Message'] ) return hostSetup = result['Value']['Setup'] result = InstallTools.addDefaultOptionsToCS( gConfig, option, system, component, getCSExtensions(), hostSetup ) if not result['OK']: self.__errMsg( result['Message'] ) return # Then we can install and start the component result = client.setupComponent( option, system, component ) if not result['OK']: self.__errMsg( result['Message'] ) return compType = result['Value']['ComponentType'] runit = result['Value']['RunitStatus'] print "%s %s_%s is installed, runit status: %s" % ( compType, system, component, runit ) else: print "Unknown option:", option