def check_amphorae(connection): """check amphroae status.""" lb_mgr = connection.load_balancer resp = lb_mgr.get('/v2/octavia/amphorae') if resp.status_code != 200: return data = json.loads(resp.content) items = data.get('amphorae', []) # raise CRITICAL for ERROR status bad_status_list = ('ERROR', ) bad_items = [item for item in items if item['status'] in bad_status_list] if bad_items: items = [ 'amphroa {} status is {}'.format(item['id'], item['status']) for item in bad_items ] output = 'CRITICAL: {}'.format(', '.join(items)) raise nagios_plugin3.CriticalError(output) # raise WARNING for these status bad_status_list = ('PENDING_CREATE', 'PENDING_DELETE', 'BOOTING') bad_items = [item for item in items if item['status'] in bad_status_list] if bad_items: items = [ 'amphroa {} status is {}'.format(item['id'], item['status']) for item in bad_items ] output = 'WARNING: {}'.format(', '.join(items)) raise nagios_plugin3.WarnError(output) print('OK: Amphorae are happy')
def check_contrail_alarms(contrail_vip, token): """Check the alarms in Contrail Analytics. @param str vip: VIP of Contrail @param str token: Token for the authentication @returns: None """ url = 'http://{}:8081/analytics/alarms'.format(contrail_vip) headers = {'X-Auth-Token': token} try: r = requests.get(url=url, headers=headers) except requests.exceptions.ConnectionError as error: raise nagios_plugin3.CriticalError( 'CRITICAL: contrail analytics API error: {}'.format(error)) if r.status_code != 200: raise nagios_plugin3.CriticalError( 'CRITICAL: contrail analytics API return code is {}'.format( r.code)) result = r.json() msg = parse_contrail_alarms(result) if msg.startswith('CRITICAL: '): raise nagios_plugin3.CriticalError(msg) elif msg.startswith('WARNING: '): raise nagios_plugin3.WarnError(msg) print('OK: no unacknowledged or sev>0 contrail analytics alarms')
def check_loadbalancers(connection): """check loadbalancers status.""" lb_mgr = connection.load_balancer lb_all = lb_mgr.load_balancers() # only check enabled lbs lb_enabled = [lb for lb in lb_all if lb.is_admin_state_up] # check provisioning_status is ACTIVE for each lb lbs = [lb for lb in lb_enabled if lb.provisioning_status != 'ACTIVE'] if lbs: items = [ 'loadbalancer {} provisioning_status is {}'.format( lb.id, lb.provisioning_status) for lb in lbs ] output = 'CRITICAL: {}'.format(', '.join(items)) raise nagios_plugin3.CriticalError(output) # check operating_status is ONLINE for each lb lbs = [lb for lb in lb_enabled if lb.operating_status != 'ONLINE'] if lbs: items = [ 'loadbalancer {} operating_status is {}'.format( lb.id, lb.operating_status) for lb in lbs ] output = 'CRITICAL: {}'.format(', '.join(items)) raise nagios_plugin3.CriticalError(output) net_mgr = connection.network # check vip port exists for each lb lbs = [] for lb in lb_enabled: try: net_mgr.get_port(lb.vip_port_id) except openstack.exceptions.NotFoundException: lbs.append(lb) if lbs: items = [ 'vip port {} for loadbalancer {} not found'.format( lb.vip_port_id, lb.id) for lb in lbs ] output = 'CRITICAL: {}'.format(', '.join(items)) raise nagios_plugin3.CriticalError(output) # warn about disabled lbs if no critical error found lb_disabled = [lb for lb in lb_all if not lb.is_admin_state_up] if lb_disabled: items = [ 'loadbalancer {} admin_state_up is False'.format(lb.id) for lb in lb_disabled ] output = 'WARNING: {}'.format(', '.join(items)) raise nagios_plugin3.WarnError(output) print('OK: Loadbalancers are happy')
def check_node(node): # Note: Keep the Ready check first since all checks will fail when not Ready checks = [ { 'name': 'Ready', 'expected': 'True', 'type': 'error', 'error': 'Node Not Ready' }, { 'name': 'MemoryPressure', 'expected': 'False', 'type': 'warn', 'error': 'Memory Pressure' }, { 'name': 'DiskPressure', 'expected': 'False', 'type': 'warn', 'error': 'Disk Pressure' }, { 'name': 'PIDPressure', 'expected': 'False', 'type': 'warn', 'error': 'PID Pressure' }, ] msg = [] error = False for check in checks: # find the status that matches for s in node['status']['conditions']: if s['type'] == check['name']: # does it match expectations? If not, toss it on the list # of errors so we don't show the first issue, but all. if s['status'].lower() != check['expected'].lower(): msg.append(check['error']) if check['type'] == 'error': error = True else: break else: err_msg = 'Unable to find status for {}'.format(check['error']) raise nagios_plugin3.CriticalError(err_msg) if msg: if error: raise nagios_plugin3.CriticalError(msg) else: raise nagios_plugin3.WarnError(msg)
def check_pools(connection): """check pools status.""" lb_mgr = connection.load_balancer pools_all = lb_mgr.pools() pools_enabled = [pool for pool in pools_all if pool.is_admin_state_up] # check provisioning_status is ACTIVE for each pool pools = [ pool for pool in pools_enabled if pool.provisioning_status != 'ACTIVE' ] if pools: items = [ 'pool {} provisioning_status is {}'.format( pool.id, pool.provisioning_status) for pool in pools ] output = 'CRITICAL: {}'.format(', '.join(items)) raise nagios_plugin3.CriticalError(output) # raise CRITICAL if ERROR pools = [ pool for pool in pools_enabled if pool.operating_status == 'ERROR' ] if pools: items = [ 'pool {} operating_status is {}'.format(pool.id, pool.operating_status) for pool in pools ] output = 'CRITICAL: {}'.format(', '.join(items)) raise nagios_plugin3.CriticalError(output) # raise WARNING if NO_MONITOR pools = [ pool for pool in pools_enabled if pool.operating_status == 'NO_MONITOR' ] if pools: items = [ 'pool {} operating_status is {}'.format(pool.id, pool.operating_status) for pool in pools ] output = 'WARNING: {}'.format(', '.join(items)) raise nagios_plugin3.WarnError(output) print('OK: Pools are happy')
def check_nova_services(args, nova): aggregates = nova.get('/os-aggregates').json()['aggregates'] services = nova.get('/os-services').json()['services'] services_compute = [x for x in services if x['binary'] == 'nova-compute'] msg = ['nova-compute'] status = [] hosts_checked = [] for agg in aggregates: # skip the defined host aggregates to be skipped from the config # making it case-insensitive skipped_aggregates = [ name.lower() for name in args.skip_aggregates.split(',') ] aggregate_name = agg['name'].lower() if aggregate_name in skipped_aggregates: continue # get a list of hosts, pass to the function hosts = agg['hosts'] hosts_checked.append(hosts) status.append( check_hosts_up(args, agg['name'], hosts, services_compute)) # find hosts that haven't been checked already hosts_checked = [item for sublist in hosts_checked for item in sublist] hosts_not_checked = [ x['host'] for x in services_compute if x['host'] not in hosts_checked ] if len(hosts_not_checked) > 0: status.append( check_hosts_up(args, '(not-part-of-any-agg)', hosts_not_checked, services_compute)) status_crit = len([agg['critical'] for agg in status if agg['critical']]) status_warn = len([agg['warning'] for agg in status if agg['warning']]) msg.extend([x['msg_text'] for x in status if x['msg_text'] != '']) if status_crit: output = 'CRITICAL: {}'.format(', '.join(msg)) raise nagios_plugin3.CriticalError(output) if status_warn: output = 'WARNING: {}'.format(', '.join(msg)) raise nagios_plugin3.WarnError(output) print('OK: Nova-compute services happy')