def apply(action): """Apply an given action. It usually involves ssh to the server with specific role and run the command, e.g., ssh to scheduler server and restart scheduler. @param action: A tuple of (the role of which the command should be executed, the command) @raise ServerActionError: If the action can't be applied due to database issue. @param subprocess.CalledProcessError: If command is failed to be executed. """ role = action[0] command = action[1] # Find the servers with role servers = server_manager_utils.get_servers( role=role, status=server_models.Server.STATUS.PRIMARY) if not servers: print >> sys.stderr, ('WARNING! Action %s failed to be applied. No ' 'server with given role %s was found.' % (action, role)) return for server in servers: print 'Run command `%s` on server %s' % (command, server.hostname) try: infra.execute_command(server.hostname, command) except subprocess.CalledProcessError as e: print >> sys.stderr, ('Failed to check server %s, error: %s' % (server.hostname, e))
def execute(self): """Execute the command. @return: A list of servers matched given hostname and role. """ if self.skylab: try: return self.execute_skylab() except (skylab_server.SkylabServerActionError, revision_control.GitError, skylab_utils.InventoryRepoDirNotClean) as e: self.failure(e, what_failed='Failed to list servers from skylab' ' inventory.', item=self.hostname, fatal=True) else: try: return server_manager_utils.get_servers(hostname=self.hostname, role=self.role, status=self.status) except (server_manager_utils.ServerActionError, error.InvalidDataError) as e: self.failure(e, what_failed='Failed to find servers', item=self.hostname, fatal=True)
def main(): """Main script.""" with site_utils.SetupTsMonGlobalState('check_slave_db_delay', indirect=True): options = parse_options() log_config = logging_config.LoggingConfig() if options.logfile: log_config.add_file_handler(file_path=os.path.abspath( options.logfile), level=logging.DEBUG) db_user = CONFIG.get_config_value('AUTOTEST_WEB', 'user') db_password = CONFIG.get_config_value('AUTOTEST_WEB', 'password') global_db_user = CONFIG.get_config_value('AUTOTEST_WEB', 'global_db_user', default=db_user) global_db_password = CONFIG.get_config_value('AUTOTEST_WEB', 'global_db_password', default=db_password) logging.info('Start checking Seconds_Behind_Master of slave databases') for replica in options.replicas: check_delay(replica, global_db_user, global_db_password) if not options.replicas: logging.warning('No replicas checked.') slaves = server_manager_utils.get_servers(role='database_slave', status='primary') for slave in slaves: check_delay(slave.hostname, db_user, db_password) if not slaves: logging.warning('No slaves checked.') logging.info('Finished checking.')
def refresh_drone_configs(self): """ Reread global config options for all drones. """ # Import server_manager_utils is delayed rather than at the beginning of # this module. The reason is that test_that imports drone_manager when # importing autoserv_utils. The import is done before test_that setup # django (test_that only setup django in setup_local_afe, since it's # not needed when test_that runs the test in a lab duts through :lab: # option. Therefore, if server_manager_utils is imported at the # beginning of this module, test_that will fail since django is not # setup yet. from autotest_lib.site_utils import server_manager_utils config = global_config.global_config section = scheduler_config.CONFIG_SECTION config.parse_config_file() for hostname, drone in self._drones.iteritems(): if server_manager_utils.use_server_db(): server = server_manager_utils.get_servers(hostname=hostname)[0] attributes = dict([(a.attribute, a.value) for a in server.attributes.all()]) drone.enabled = (int(attributes.get('disabled', 0)) == 0) drone.max_processes = int( attributes.get( 'max_processes', scheduler_config.config.max_processes_per_drone)) allowed_users = attributes.get('users', None) else: disabled = config.get_config_value(section, '%s_disabled' % hostname, default='') drone.enabled = not bool(disabled) drone.max_processes = config.get_config_value( section, '%s_max_processes' % hostname, type=int, default=scheduler_config.config.max_processes_per_drone) allowed_users = config.get_config_value(section, '%s_users' % hostname, default=None) if allowed_users: drone.allowed_users = set(allowed_users.split()) else: drone.allowed_users = None logging.info('Drone %s.max_processes: %s', hostname, drone.max_processes) logging.info('Drone %s.enabled: %s', hostname, drone.enabled) logging.info('Drone %s.allowed_users: %s', hostname, drone.allowed_users) logging.info('Drone %s.support_ssp: %s', hostname, drone.support_ssp) self._reorder_drone_queue() # max_processes may have changed # Clear notification record about reaching max_processes limit. self._notify_record = {}
def execute(self): """Execute the command. @return: A list of servers matched given hostname and role. """ try: return server_manager_utils.get_servers(hostname=self.hostname, role=self.role, status=self.status) except (server_manager_utils.ServerActionError, error.InvalidDataError) as e: self.failure(e, what_failed='Failed to find servers', item=self.hostname, fatal=True)
def get_servers(hostname=None, role=None, status=None): """Get a list of servers with matching role and status. @param hostname: FQDN of the server. @param role: Name of the server role, e.g., drone, scheduler. Default to None to match any role. @param status: Status of the server, e.g., primary, backup, repair_required. Default to None to match any server status. @raises error.RPCException: If server database is not used. @return: A list of server names for servers with matching role and status. """ if not server_manager_utils.use_server_db(): raise error.RPCException('Server database is not enabled. Please try ' 'retrieve servers from global config.') servers = server_manager_utils.get_servers(hostname=hostname, role=role, status=status) return [s.get_details() for s in servers]