Пример #1
0
class NTPAlerter(object):
    def __init__(self, checks, objs):
        self.checks = checks
        self.objs = objs
        self.mc = MetricClassifier(_metricdefs)

    def collectmetrics(self, debug):
        """
        Get metrics from each registered metric source and add all relevant aliases.
        """
        self.metrics = {}
        for o in self.objs:
            self.metrics.update(self.objs[o].getmetrics())
        if debug:
            pprint.pprint(self.metrics)
        metrics.addaliases(self.metrics, _aliases)
        if 'proc' in self.checks:
            self.checks.append('runtime')
        if 'trace' in self.checks:
            self.checks.append('tracehosts')
            self.checks.append('traceloops')
        if 'vars' in self.checks and 'offset' not in self.checks:
            self.checks.append('sysoffset')

    def custom_message(self, metric, result):
        """
        Special cases for message formats
        """
        if metric == 'runtime':
            return self.custom_message_runtime(result)
        elif metric == 'sync':
            return self.custom_message_sync(result)
        elif metric == 'tracehosts':
            return self.custom_message_tracehosts(result)
        elif metric == 'traceloops':
            return self.custom_message_traceloops(result)
        return None

    def custom_message_runtime(self, result):
        proc = self.objs['proc']
        if result == 'CRITICAL':
            return '%s: No NTP process could be found.  Please check that an NTP server is installed and running.' % (
                result, )
        elif result == 'WARNING':
            return 'OK: %s has only been running %d seconds' % (
                proc.name, proc.getruntime())
        elif result == 'OK':
            return '%s: %s has been running %d seconds' % (result, proc.name,
                                                           proc.getruntime())
        return None

    def custom_message_sync(self, result):
        if result == 'CRITICAL':
            return '%s: No sync peer selected' % (result, )
        elif result == 'OK':
            return '%s: Time is in sync with %s' % (
                result, self.objs['peers'].syncpeer())
        return None

    def custom_message_traceloops(self, result):
        if result == 'CRITICAL':
            return '%s: Trace loop detected at host %s' % (
                result, self.objs['trace'].loophost)
        elif result == 'OK':
            return '%s: Trace detected no loops' % (result, )
        return None

    def custom_message_tracehosts(self, result):
        trace = self.objs['trace']
        return '%s: %d hosts detected in trace: %s' % (
            result, trace.results['tracehosts'], ", ".join(trace.hostlist))

    def alert_collectd(self, hostname, interval):
        """
        Produce collectd output for the metrics
        """
        self.collectmetrics(False)
        self.mc.classify_metrics(self.metrics)
        (m, rc) = self.mc.worst_metric(self.checks)
        self.metrics['result'] = self.return_code()
        for metric in sorted(_collectdtypes.keys()):
            if metric in _collectdtypes and metric in self.metrics:
                print('PUTVAL "%s/ntpmon-%s" interval=%d N:%.9f' % (
                    hostname,
                    _collectdtypes[metric],
                    interval,
                    self.metrics[metric],
                ))
        if sys.stdout.isatty():
            # we're outputting to a terminal; must be test mode
            print('')
        else:
            # flush standard output to ensure metrics are sent to collectd immediately
            sys.stdout.flush()

    def alert_nagios(self, debug):
        """
        Produce nagios output for the metrics
        """
        self.collectmetrics(debug)
        results = self.mc.classify_metrics(self.metrics)
        msgs = {}
        for metric in self.checks:
            if metric in results:
                msgs[metric] = self.custom_message(metric, results[metric])
                if msgs[metric] is None:
                    msgs[metric] = self.mc.message(metric, _formats[metric][0],
                                                   _formats[metric][1])
        if debug:
            for m in msgs:
                print(msgs[m])
        else:
            (m, rc) = self.mc.worst_metric(self.checks)
            self.metrics['result'] = self.return_code()
            print("%s | %s" % (msgs[m], self.report()))

    def report(self):
        """
        Report metric values.
        """
        items = []
        for m in sorted(_aliases.keys()):
            if m in self.metrics:
                if _formats[m] is None:
                    fmt = 'f'
                else:
                    fmt = _formats[m][1] if _formats[m][1] != '%' else 'f'
                val = self.mc.fmtstr(fmt) % self.metrics[m]
                items.append("%s=%s" % (m, val))
            else:
                items.append("%s=" % (m, ))
        return " ".join(items)

    def return_code(self):
        """
        Don't return anything other than OK until ntpd has been running for
        at least enough time for 8 polling intervals of 64 seconds each.  This
        prevents false positives due to ntpd restarts or short-lived VMs.
        """
        if 'runtime' in self.mc.results and self.mc.results[
                'runtime'] == 'WARNING':
            return 0
        else:
            return self.mc.return_code(self.checks)
Пример #2
0
 def test_classify_process(self):
     mc = MetricClassifier(goodmetricdefs)
     self.assertEqual(mc.classify_metrics(samplemetrics), samplemetricresults)
     self.assertEqual(mc.worst_metric(samplemetrics), ('d', 3))
     self.assertEqual(mc.return_code(samplemetrics), 3)
     self.assertEqual(mc.return_code(samplemetrics, unknown_as_critical=True), 2)
Пример #3
0
class NTPAlerter(object):
 
    def __init__(self, checks, objs):
        self.checks = checks
        self.objs = objs
        self.mc = MetricClassifier(_metricdefs)

    def collectmetrics(self, debug):
        """
        Get metrics from each registered metric source and add all relevant aliases.
        """
        self.metrics = {}
        for o in self.objs:
            self.metrics.update(self.objs[o].getmetrics())
        if debug:
            pprint.pprint(self.metrics)
        metrics.addaliases(self.metrics, _aliases)
        if 'proc' in self.checks:
            self.checks.append('runtime')
        if 'trace' in self.checks:
            self.checks.append('tracehosts')
            self.checks.append('traceloops')
        if 'vars' in self.checks and 'offset' not in self.checks:
            self.checks.append('sysoffset')

    def custom_message(self, metric, result):
        """
        Special cases for message formats
        """
        if metric == 'runtime':
            return self.custom_message_runtime(result)
        elif metric == 'sync':
            return self.custom_message_sync(result)
        elif metric == 'tracehosts':
            return self.custom_message_tracehosts(result)
        elif metric == 'traceloops':
            return self.custom_message_traceloops(result)
        return None

    def custom_message_runtime(self, result):
        proc = self.objs['proc']
        if result == 'CRITICAL':
            return '%s: No NTP process could be found.  Please check that an NTP server is installed and running.' % (result,)
        elif result == 'WARNING':
            return 'OK: %s has only been running %d seconds' % (proc.name, proc.getruntime())
        elif result == 'OK':
            return '%s: %s has been running %d seconds' % (result, proc.name, proc.getruntime())
        return None

    def custom_message_sync(self, result):
        if result == 'CRITICAL':
            return '%s: No sync peer selected' % (result,)
        elif result == 'OK':
            return '%s: Time is in sync with %s' % (result, self.objs['peers'].syncpeer())
        return None

    def custom_message_traceloops(self, result):
        if result == 'CRITICAL':
            return '%s: Trace loop detected at host %s' % (result, self.objs['trace'].loophost)
        elif result == 'OK':
            return '%s: Trace detected no loops' % (result,)
        return None

    def custom_message_tracehosts(self, result):
        trace = self.objs['trace']
        return '%s: %d hosts detected in trace: %s' % (
            result,
            trace.results['tracehosts'],
            ", ".join(trace.hostlist)
        )

    def alert_collectd(self, hostname, interval):
        """
        Produce collectd output for the metrics
        """
        self.collectmetrics(False)
        self.mc.classify_metrics(self.metrics)
        (m, rc) = self.mc.worst_metric(self.checks)
        self.metrics['result'] = self.return_code()
        for metric in sorted(_collectdtypes.keys()):
            if metric in _collectdtypes and metric in self.metrics:
                print('PUTVAL "%s/ntpmon-%s" interval=%d N:%.9f' % (
                    hostname,
                    _collectdtypes[metric],
                    interval,
                    self.metrics[metric],
                ))
        if sys.stdout.isatty():
            # we're outputting to a terminal; must be test mode
            print('')
        else:
            # flush standard output to ensure metrics are sent to collectd immediately
            sys.stdout.flush()

    def alert_nagios(self, debug):
        """
        Produce nagios output for the metrics
        """
        self.collectmetrics(debug)
        results = self.mc.classify_metrics(self.metrics)
        msgs = {}
        for metric in self.checks:
            if metric in results:
                msgs[metric] = self.custom_message(metric, results[metric])
                if msgs[metric] is None:
                    msgs[metric] = self.mc.message(metric, _formats[metric][0], _formats[metric][1])
        if debug:
            for m in msgs:
                print(msgs[m])
        else:
            (m, rc) = self.mc.worst_metric(self.checks)
            self.metrics['result'] = self.return_code()
            print("%s | %s" % (msgs[m], self.report()))

    def report(self):
        """
        Report metric values.
        """
        items = []
        for m in sorted(_aliases.keys()):
            if m in self.metrics:
                if _formats[m] is None:
                    fmt = 'f'
                else:
                    fmt = _formats[m][1] if _formats[m][1] != '%' else 'f'
                val = self.mc.fmtstr(fmt) % self.metrics[m]
                items.append("%s=%s" % (m, val))
            else:
                items.append("%s=" % (m,))
        return " ".join(items)

    def return_code(self):
        """
        Don't return anything other than OK until ntpd has been running for
        at least enough time for 8 polling intervals of 64 seconds each.  This
        prevents false positives due to ntpd restarts or short-lived VMs.
        """
        if 'runtime' in self.mc.results and self.mc.results['runtime'] == 'WARNING':
            return 0
        else:
            return self.mc.return_code(self.checks)