def test_check_replication(): """Codes and messages should conform to the nagios api and the specification of the check_replication function""" # replication < 2 => warn mock_warn_range = (2, 1000) # replication < 1 => critical mock_crit_range = (1, 1000) code, message = check_replication( 'foo', 0, mock_warn_range, mock_crit_range, ) assert code == 2 and 'foo' in message code, message = check_replication( 'foo', 1, mock_warn_range, mock_crit_range, ) assert code == 1 and 'foo' in message code, message = check_replication( 'bar', 2, mock_warn_range, mock_crit_range, ) assert code == 0 and 'bar' in message
def do_replication_check(service, monitoring_config, service_replication): """Do a replication check on the provided service and generate notification events based on the information in monitoring_config and service_replication. Note that the caller must provide replication data :param service: The name of the service to send an event for :param monitoring_config: A dictionary conforming to the mandatory monitoring keys (as defined by extract_replication_info) and optionally providing additional keys: - runbook ("no runbook"): The runbook to refer oncall members to - tip ("no tip"): A tip for oncall members - page (false): Whether to page the provided team on failure - alert_after ("0s"): How many minutes before going critical - realert_every (-1): How many events before you trigger a realert -1 indicates an exponential backoff - extra.replication.key ("habitat"): The file in /nail/etc to inspect to figure out which value to lookup in map - extra.replication.default (1): The default number of instances to check for - extra.replication.map ({}): A lookup that maps the replication keys to the appropriate minimum replication value :param service_replication: An int that represents the present replication. The default behavior is to send emails to a team if their service reaches 0 replicas, although teams can fine tune this to their needs :returns: A dictionary that conforms to the expected sensu event API. Note that this function does NOT send it to Sensu """ replication_config = monitoring_config.get('extra', {}).get('replication', {}) replication_key = replication_config.get('key', 'habitat') replication_default = replication_config.get('default', 1) replication_map = replication_config.get('map', {}) try: goal_replication = replication_map[read_key(replication_key)] except (IOError, KeyError): # Either the /nail/etc/{key} file didn't exist or the result didn't # appear in the replication_map, either way use the default goal_replication = replication_default warn_range = (goal_replication, sys.maxint) crit_range = warn_range status_code, message = check_replication(service, service_replication, warn_range, crit_range) return { 'name': "replication_{0}".format(service), 'status': status_code, 'output': message, 'team': monitoring_config['team'], 'notification_email': monitoring_config['notification_email'], 'runbook': monitoring_config['runbook'] or 'no runbook', 'tip': monitoring_config['tip'] or 'no tip', 'page': monitoring_config['page'] or False, 'check_every': '1m', 'alert_after': monitoring_config['alert_after'] or '0s', 'realert_every': monitoring_config['realert_every'] or -1, }
def test_check_replication(): """Codes and messages should conform to the nagios api and the specification of the check_replication function""" # replication < 2 => warn mock_warn_range = (2, 1000) # replication < 1 => critical mock_crit_range = (1, 1000) code, message = check_replication('foo', 0, mock_warn_range, mock_crit_range) assert code == 2 and 'foo' in message code, message = check_replication('foo', 1, mock_warn_range, mock_crit_range) assert code == 1 and 'foo' in message code, message = check_replication('bar', 2, mock_warn_range, mock_crit_range) assert code == 0 and 'bar' in message
def do_replication_check(service, monitoring_config, service_replication): """Do a replication check on the provided service and generate notification events based on the information in monitoring_config and service_replication. Note that the caller must provide replication data :param service: The name of the service to send an event for :param monitoring_config: A dictionary conforming to the mandatory monitoring keys (as defined by extract_replication_info) and optionally providing additional keys: - runbook ("no runbook"): The runbook to refer oncall members to - tip ("no tip"): A tip for oncall members - page (false): Whether to page the provided team on failure - alert_after ("0s"): How many minutes before going critical - realert_every (-1): How many events before you trigger a realert -1 indicates an exponential backoff - extra.replication.key ("habitat"): The file in /nail/etc to inspect to figure out which value to lookup in map - extra.replication.default (1): The default number of instances to check for - extra.replication.map ({}): A lookup that maps the replication keys to the appropriate minimum replication value :param service_replication: An int that represents the present replication. The default behavior is to send emails to a team if their service reaches 0 replicas, although teams can fine tune this to their needs :returns: A dictionary that conforms to the expected sensu event API. Note that this function does NOT send it to Sensu """ replication_config = monitoring_config.get('extra', {}).get( 'replication', {}) replication_key = replication_config.get('key', 'habitat') replication_default = replication_config.get('default', 1) replication_map = replication_config.get('map', {}) try: goal_replication = replication_map[read_key(replication_key)] except (IOError, KeyError): # Either the /nail/etc/{key} file didn't exist or the result didn't # appear in the replication_map, either way use the default goal_replication = replication_default warn_range = (goal_replication, sys.maxint) crit_range = warn_range status_code, message = check_replication(service, service_replication, warn_range, crit_range) return { 'name': "replication_{0}".format(service), 'status': status_code, 'output': message, 'team': monitoring_config['team'], 'notification_email': monitoring_config['notification_email'], 'runbook': monitoring_config['runbook'] or 'no runbook', 'tip': monitoring_config['tip'] or 'no tip', 'page': monitoring_config['page'] or False, 'check_every': '1m', 'alert_after': monitoring_config['alert_after'] or '0s', 'realert_every': monitoring_config['realert_every'] or -1, }