def test_plugin_alert(self): self._cleanup() plugin_alert = { "above_below": "above", "rule_type": "plugin", "server": self.server_id, "gauge": self.gauge_id, "plugin": self.plugin_id, "account_id": self.account_id, "key": "testkey", "period": 0, "metric_value": 5 } alert_id = alerts_model.collection.insert(plugin_alert) key_name = '{0}.testkey'.format(self.gauge['name']) data = {'gauges': {'bla.test': 1, key_name: 6}} plugin_alerter.check(data=data, plugin=self.plugin) unsent_alerts = alerts_history_model.get_unsent() for trigger in unsent_alerts['data']: assert trigger['alert_id'] == alert_id assert trigger['average_value'] == 6 eq_(unsent_alerts['data'].count(), 1) self._cleanup() plugin = plugin_model.get_or_create(server_id=self.server_id, name='mongo') gauge = 'global_gauge.global_key' plugin_alert = { "above_below": "above", "rule_type": "plugin_global", "server": 'all', "plugin": 'mongo', "gauge": 'global_gauge', "key": 'global_key', "period": 0, "metric_value": 5 } alert_id = alerts_model.collection.insert(plugin_alert) data = {'gauges': {'bla.test': 1, gauge: 6}} plugin_alerter.check(data=data, plugin=plugin, server=self.server) unsent_alerts = alerts_history_model.get_unsent() for trigger in unsent_alerts['data']: assert trigger['alert_id'] == alert_id assert trigger['average_value'] == 6 eq_(unsent_alerts['data'].count(), 1)
def test_process_alert(self): self._cleanup() process_alert = { "above_below": "above", "rule_type": "process", "server": self.server_id, "process": self.process_id, "account_id": self.account_id, "period": 0, } cpu_alert_dict = {**process_alert, 'metric': 'CPU', 'metric_value': 1, 'metric_type': "%"} alert_id = alerts_model.collection.insert(cpu_alert_dict) cpu_value = float(2) data = {'data': [{'p': self.process_id, 'c': cpu_value}]} process_alerter.check(data, self.server) unsent_alerts = alerts_history_model.get_unsent() for trigger in unsent_alerts['data']: assert trigger['alert_id'] == alert_id assert trigger['average_value'] == cpu_value eq_(unsent_alerts['data'].count(), 1) self._cleanup() process_alert = { "above_below": "above", "rule_type": "process_global", "server": 'all', "process": 'mongo', "account_id": self.account_id, "period": 0, } process = process_model.get_or_create(server_id=self.server_id, name='mongo') global_process_id = process['_id'] cpu_alert_dict = {**process_alert, 'metric': 'CPU', 'metric_value': 1, 'metric_type': "%"} alert_id = alerts_model.collection.insert(cpu_alert_dict) cpu_value = float(2) data = {'data': [{'p': global_process_id, 'c': cpu_value}]} process_alerter.check(data, self.server) unsent_alerts = alerts_history_model.get_unsent() for trigger in unsent_alerts['data']: assert trigger['alert_id'] == alert_id assert trigger['average_value'] == cpu_value eq_(unsent_alerts['data'].count(), 1)
def test_notsendingdata_alert(self): self._cleanup() now = unix_utc_now() uptime_alert = { "rule_type": "system", "server": self.server_id, "account_id": self.account_id, "period": 0, } cpu_alert_dict = {**uptime_alert, 'metric': 'NotSendingData'} alert_id = alerts_model.collection.insert(cpu_alert_dict) server_model.update({'last_check': now - 15}, self.server_id) notsendingdata_alerter.check() unsent_alerts = alerts_history_model.get_unsent() for trigger in unsent_alerts['data']: assert trigger['alert_id'] == alert_id eq_(unsent_alerts['data'].count(), 1) self._cleanup() now = unix_utc_now() uptime_alert = { "rule_type": "global", "server": "all", "account_id": self.account_id, "period": 0, } cpu_alert_dict = {**uptime_alert, 'metric': 'NotSendingData'} alert_id = alerts_model.collection.insert(cpu_alert_dict) server_model.update({'last_check': now - 15}, self.server_id) notsendingdata_alerter.check() unsent_alerts = alerts_history_model.get_unsent() for trigger in unsent_alerts['data']: assert trigger['alert_id'] == alert_id eq_(unsent_alerts['data'].count(), 1)
def test_system_emails(self): self._cleanup() system_alert = { "above_below": "above", "rule_type": "system", "server": self.server_id, "account_id": self.account_id, "period": 0, "notifications": self.notifications_list } # CPU alert cpu_alert = {**system_alert, 'metric': 'CPU', 'metric_value': 1, 'metric_type': "%"} alerts_model.collection.insert(cpu_alert) data = {u'cpu': {u'system': u'1.30', u'idle': u'98.70', u'user': u'0.00', u'steal': u'0.00', u'nice': u'0.00'}} server_alerter.check(data, self.server) unsent_alerts = alerts_history_model.get_unsent(server_id=self.server_id) eq_(unsent_alerts['data'].count(), 1) notifications = generate_notifications() for n in notifications: send_notification_email(notification=n, emails=self.emails) self.assertEqual(len(mail.outbox), 1) self.assertEqual(mail.outbox[0].subject, 'Server: test - CPU > 1% alert (Current value: 1.3%)') self.assertEqual(mail.outbox[0].to, ['*****@*****.**']) self._cleanup()
def test_process_emails(self): self._cleanup() process_alert = { "above_below": "above", "rule_type": "process", "server": self.server_id, "process": self.process_id, "account_id": self.account_id, "period": 0, "notifications": self.notifications_list } # CPU alert cpu_alert = {**process_alert, 'metric': 'CPU', 'metric_value': 1, 'metric_type': "%"} alerts_model.collection.insert(cpu_alert) data = {'data': [{'p': self.process_id, 'c': 2, 'm': 254.0}]} process_alerter.check(data, self.server) unsent_alerts = alerts_history_model.get_unsent() eq_(unsent_alerts['data'].count(), 1) notifications = generate_notifications() for n in notifications: send_notification_email(notification=n, emails=self.emails) self.assertEqual(len(mail.outbox), 1) self.assertEqual(mail.outbox[0].subject, 'Server: test - testprocess/CPU > 1% alert (Current value: 2.0%)') self.assertEqual(mail.outbox[0].to, ['*****@*****.**']) self._cleanup()
def test_health_check_emails(self): self._cleanup() health_check_alert = { "rule_type": "health_check", "server": self.server_id, "status": "critical", "command": "check-http.rb", "period": 0, } alert_id = alerts_model.collection.insert(health_check_alert) data = [{ u'command': u'check-http.rb', u'name': u'', u'exit_code': 2, }] formated_check_data = health_checks_results_model.save(data=data, server=self.server) health_check_alerter.check(data=formated_check_data, server=self.server) unsent_alerts = alerts_history_model.get_unsent(server_id=self.server_id) eq_(unsent_alerts['data'].count(), 1) notifications = generate_notifications() for n in notifications: send_notification_email(notification=n, emails=self.emails) self.assertEqual(len(mail.outbox), 1) self.assertEqual(mail.outbox[0].subject, 'Server: test - check-http.rb status is CRITICAL') self.assertEqual(mail.outbox[0].to, ['*****@*****.**']) self._cleanup()
def test_uptime_emails(self): self._cleanup() # GLOBAL ALERT uptime_alert = { "above_below": "above", "rule_type": "uptime", "server": self.server_id, "process": self.process_id, "account_id": self.account_id, "period": 0, "notifications": self.notifications_list } down_alert = {**uptime_alert, 'metric': 'Down', 'metric_value': 0} alerts_model.collection.insert(down_alert) data = {'data': []} uptime_alerter.check(data, self.server) unsent_alerts = alerts_history_model.get_unsent(server_id=self.server_id) eq_(unsent_alerts['data'].count(), 1) notifications = generate_notifications() for n in notifications: send_notification_email(notification=n, emails=self.emails) self.assertEqual(len(mail.outbox), 1) self.assertEqual(mail.outbox[0].subject, 'Server: test / testprocess is Down') self.assertEqual(mail.outbox[0].to, ['*****@*****.**']) self._cleanup()
def test_system_check(self): self._cleanup() # System alert system_alert = { "above_below": "above", "rule_type": "system", "server": self.server_id, "account_id": self.account_id, "period": 0, } # CPU alert cpu_alert_dict = {**system_alert, 'metric': 'CPU', 'metric_value': 1, 'metric_type': "%"} alert_id = alerts_model.collection.insert(cpu_alert_dict) rules = alerts_model.get_alerts(type='system', server=self.server) eq_(len(rules), 1) data = {u'cpu': {u'system': u'1.30', u'idle': u'98.70', u'user': u'0.00', u'steal': u'0.00', u'nice': u'0.00'}} server_alerter.check(data, self.server) unsent_alerts = alerts_history_model.get_unsent(server_id=self.server_id) for trigger in unsent_alerts['data']: assert trigger['alert_id'] == alert_id eq_(unsent_alerts['data'].count(), 1) self._cleanup()
def test_health_check_alert(self): self._cleanup() # Alert for 1 server health_check_alert = { "rule_type": "health_check", "server": self.server_id, "status": "critical", "command": "check-http.rb", "period": 0, } alert_id = alerts_model.collection.insert(health_check_alert) data = [{u'command': u'check-http.rb', u'name': u'', u'exit_code': 2}] health_check_alerter.check(data=data, server=self.server) unsent_alerts = alerts_history_model.get_unsent() for trigger in unsent_alerts['data']: assert trigger['alert_id'] == alert_id eq_(unsent_alerts['data'].count(), 1) self._cleanup() global_health_check_alert = { "rule_type": "health_check", "status": "critical", "command": "check-http.rb", "period": 0, } alert_id = alerts_model.collection.insert(global_health_check_alert) data = [{ u'command': u'check-http.rb -u amon.cx', u'name': u'', u'exit_code': 2 }] health_check_alerter.check(data=data, server=self.server) unsent_alerts = alerts_history_model.get_unsent() for trigger in unsent_alerts['data']: assert trigger['alert_id'] == alert_id eq_(unsent_alerts['data'].count(), 1)
def test_health_check_alert(self): self._cleanup() # Alert for 1 server health_check_alert = { "rule_type": "health_check", "server": self.server_id, "status": "critical", "command": "check-http.rb", "period": 0, } alert_id = alerts_model.collection.insert(health_check_alert) data = [{u'command': u'check-http.rb', u'name': u'', u'exit_code': 2}] health_check_alerter.check(data=data, server=self.server) unsent_alerts = alerts_history_model.get_unsent() for trigger in unsent_alerts['data']: assert trigger['alert_id'] == alert_id eq_(unsent_alerts['data'].count(), 1) self._cleanup() global_health_check_alert = { "rule_type": "health_check", "status": "critical", "command": "check-http.rb", "period": 0, } alert_id = alerts_model.collection.insert(global_health_check_alert) data = [{u'command': u'check-http.rb -u amon.cx', u'name': u'', u'exit_code': 2}] health_check_alerter.check(data=data, server=self.server) unsent_alerts = alerts_history_model.get_unsent() for trigger in unsent_alerts['data']: assert trigger['alert_id'] == alert_id eq_(unsent_alerts['data'].count(), 1)
def test_plugin_emails(self): self._cleanup() plugin = plugin_model.get_or_create(server_id=self.server_id, name='testplugin') gauge = plugin_model.get_or_create_gauge_by_name(plugin=plugin, name='gauge') plugin_alert = { "above_below": "above", "rule_type": "plugin", "server": self.server_id, "gauge": gauge['_id'], "plugin": plugin['_id'], "account_id": self.account_id, "key": "testkey", "period": 0, "metric_value": 5, "notifications": self.notifications_list } alert_id = alerts_model.collection.insert(plugin_alert) key_name = '{0}.testkey'.format(gauge['name']) data = {'gauges': {'bla.test': 1, key_name: 6}} plugin_alerter.check(data=data, plugin=plugin) unsent_alerts = alerts_history_model.get_unsent() for trigger in unsent_alerts['data']: assert trigger['alert_id'] == alert_id assert trigger['average_value'] == 6 eq_(unsent_alerts['data'].count(), 1) notifications = generate_notifications() for n in notifications: send_notification_email(notification=n, emails=self.emails) self.assertEqual(len(mail.outbox), 1) self.assertEqual( mail.outbox[0].subject, 'Server: test - testplugin.gauge.testkey > 5 (Current value: 6.0)') self.assertEqual(mail.outbox[0].to, ['*****@*****.**']) self._cleanup()
def test_global_check(self): self._cleanup() # GLOBAL ALERT global_alert = { "above_below": "above", "rule_type": "global", "server": "all", "account_id": self.account_id, "period": 0, } # CPU alert cpu_alert_dict = { **global_alert, 'metric': 'CPU', 'metric_value': 1, 'metric_type': "%" } alert_id = alerts_model.collection.insert(cpu_alert_dict) global_rules = alerts_model.get_global_alerts( account_id=self.account_id) eq_(len(global_rules), 1) data = { u'cpu': { u'system': u'1.30', u'idle': u'98.70', u'user': u'0.00', u'steal': u'0.00', u'nice': u'0.00' } } server_alerter.check(data, self.server) unsent_alerts = alerts_history_model.get_unsent( server_id=self.server_id) for trigger in unsent_alerts['data']: assert trigger['alert_id'] == alert_id eq_(unsent_alerts['data'].count(), 1)
def test_plugin_emails(self): self._cleanup() plugin = plugin_model.get_or_create(server_id=self.server_id, name='testplugin') gauge = plugin_model.get_or_create_gauge_by_name(plugin=plugin, name='gauge') plugin_alert = { "above_below": "above", "rule_type": "plugin", "server": self.server_id, "gauge": gauge['_id'], "plugin": plugin['_id'], "account_id": self.account_id, "key": "testkey", "period": 0, "metric_value": 5, "notifications": self.notifications_list } alert_id = alerts_model.collection.insert(plugin_alert) key_name = '{0}.testkey'.format(gauge['name']) data = {'gauges': {'bla.test': 1, key_name: 6}} plugin_alerter.check(data=data, plugin=plugin) unsent_alerts = alerts_history_model.get_unsent() for trigger in unsent_alerts['data']: assert trigger['alert_id'] == alert_id assert trigger['average_value'] == 6 eq_(unsent_alerts['data'].count(), 1) notifications = generate_notifications() for n in notifications: send_notification_email(notification=n, emails=self.emails) self.assertEqual(len(mail.outbox), 1) self.assertEqual(mail.outbox[0].subject, 'Server: test - testplugin.gauge.testkey > 5 (Current value: 6.0)') self.assertEqual(mail.outbox[0].to, ['*****@*****.**']) self._cleanup()
def test_uptime_alert(self): self._cleanup() uptime_alert = { "above_below": "above", "rule_type": "uptime", "server": self.server_id, "process": self.process_id, "account_id": self.account_id, "period": 0, } cpu_alert_dict = {**uptime_alert, 'metric': 'Down', 'metric_value': 0} alerts_model.collection.insert(cpu_alert_dict) data = {'data': []} uptime_alerter.check(data, self.server) unsent_alerts = alerts_history_model.get_unsent(server_id=self.server_id) eq_(unsent_alerts['data'].count(), 1) self._cleanup()
def test_uptime_alert(self): self._cleanup() uptime_alert = { "above_below": "above", "rule_type": "uptime", "server": self.server_id, "process": self.process_id, "account_id": self.account_id, "period": 0, } cpu_alert_dict = {**uptime_alert, 'metric': 'Down', 'metric_value': 0} alerts_model.collection.insert(cpu_alert_dict) data = {'data': []} uptime_alerter.check(data, self.server) unsent_alerts = alerts_history_model.get_unsent( server_id=self.server_id) eq_(unsent_alerts['data'].count(), 1) self._cleanup()
def generate_notifications(): notifications_list = [] unsent_alerts = alerts_history_model.get_unsent() for trigger in unsent_alerts.get('data'): result = AmonStruct() result.global_mute = False metadata = None timezone = 'UTC' try: alert = alerts_model.get_by_id(trigger['alert_id']) except: alert = None # Deleted alert here if alert: rule_type = alert.get('rule_type', 'system') metric_type = alert.get('metric', None) else: rule_type = 'alert-does-not-exist' if rule_type in ['global', 'process_global', 'plugin_global', 'process', 'system', 'plugin', 'uptime', 'health_check']: if rule_type in ['global', 'process_global', 'plugin_global', 'health_check']: server_id = trigger.get('server_id') else: server_id = alert.get('server') if server_id: server = server_model.get_by_id(server_id) result.server = server result.global_mute = alert_mute_servers_model.check_if_server_is_muted(server=server) if metric_type: metric_type = metric_type.lower() if metric_type in ['cpu', 'memory', 'loadavg']: trigger_period_from = trigger['from'] trigger_period_to = trigger['time'] metric_type = 'cpu' if metric_type == 'loadavg' else metric_type # Get CPU top consumers for Load average if server: metadata = process_model.get_top_consumers_for_period(date_from=trigger_period_from, date_to=trigger_period_to, server=server, metric_type=metric_type) # Overwrite rule_type for the new type if metric_type == 'notsendingdata': alert['rule_type'] = 'notsendingdata' if metric_type == 'disk': volume_id = trigger.get('volume') metadata = volumes_model.get_by_id(volume_id) if metric_type in ['network/inbound', 'network/outbound']: interface_id = trigger.get('interface') metadata = interfaces_model.get_by_id(interface_id) if rule_type == 'process_global': process_name = alert.get('process') result.process = process_model.get_by_name_and_server_id(server_id=server_id, name=process_name) if rule_type == 'plugin_global': gauge_name = alert.get('gauge') plugin_name = alert.get('plugin') result.plugin = plugin_model.get_by_name_and_server_id(server_id=server_id, name=plugin_name) result.gauge = plugin_model.get_gauge_by_name_and_plugin_id(plugin=result.plugin, name=gauge_name) # Process and Uptime alerts if rule_type == 'process' or rule_type == 'uptime': process_dict = alert.get('process') if process_dict: result.process = process_model.get_by_id(process_dict.get('_id')) if rule_type == 'plugin': result.plugin = alert.get('plugin') result.gauge = alert.get('gauge') if rule_type == 'health_check': health_check_result_id = trigger.get('health_checks_data_id') health_check_result = health_checks_results_model.get_by_id(health_check_result_id) if type(health_check_result) is dict: health_check_id = health_check_result.get('check_id') health_check = health_checks_model.get_by_id(health_check_id) result.healthcheck = health_check result.health_check_result = health_check_result if alert: result.alert = alert result.metadata = metadata result.timezone = timezone result.trigger = trigger result.mute = alert.get('mute', False) # Shortcut notifications_list.append(result) return notifications_list
def test_process_alert(self): self._cleanup() process_alert = { "above_below": "above", "rule_type": "process", "server": self.server_id, "process": self.process_id, "account_id": self.account_id, "period": 0, } cpu_alert_dict = { **process_alert, 'metric': 'CPU', 'metric_value': 1, 'metric_type': "%" } alert_id = alerts_model.collection.insert(cpu_alert_dict) cpu_value = float(2) data = {'data': [{'p': self.process_id, 'c': cpu_value}]} process_alerter.check(data, self.server) unsent_alerts = alerts_history_model.get_unsent() for trigger in unsent_alerts['data']: assert trigger['alert_id'] == alert_id assert trigger['average_value'] == cpu_value eq_(unsent_alerts['data'].count(), 1) self._cleanup() process_alert = { "above_below": "above", "rule_type": "process_global", "server": 'all', "process": 'mongo', "account_id": self.account_id, "period": 0, } process = process_model.get_or_create(server_id=self.server_id, name='mongo') global_process_id = process['_id'] cpu_alert_dict = { **process_alert, 'metric': 'CPU', 'metric_value': 1, 'metric_type': "%" } alert_id = alerts_model.collection.insert(cpu_alert_dict) cpu_value = float(2) data = {'data': [{'p': global_process_id, 'c': cpu_value}]} process_alerter.check(data, self.server) unsent_alerts = alerts_history_model.get_unsent() for trigger in unsent_alerts['data']: assert trigger['alert_id'] == alert_id assert trigger['average_value'] == cpu_value eq_(unsent_alerts['data'].count(), 1)