Пример #1
0
class TestSummary(unittest.TestCase):
    def setUp(self):
        self.registry = CollectorRegistry()
        self.summary = Summary('s', 'help', registry=self.registry)

    def test_summary(self):
        self.assertEqual(0, self.registry.get_sample_value('s_count'))
        self.assertEqual(0, self.registry.get_sample_value('s_sum'))
        self.summary.observe(10)
        self.assertEqual(1, self.registry.get_sample_value('s_count'))
        self.assertEqual(10, self.registry.get_sample_value('s_sum'))

    def test_function_decorator(self):
        self.assertEqual(0, self.registry.get_sample_value('s_count'))

        @self.summary.time()
        def f():
            pass

        self.assertEqual(([], None, None, None), inspect.getargspec(f))

        f()
        self.assertEqual(1, self.registry.get_sample_value('s_count'))

    def test_block_decorator(self):
        self.assertEqual(0, self.registry.get_sample_value('s_count'))
        with self.summary.time():
            pass
        self.assertEqual(1, self.registry.get_sample_value('s_count'))
Пример #2
0
    def test_timer_not_observable(self):
        s = Summary('test', 'help', labelnames=('label',), registry=self.registry)

        try:
            s.time()
        except ValueError as e:
            self.assertIn('missing label values', str(e))
Пример #3
0
class TestSummary(unittest.TestCase):
    def setUp(self):
        self.registry = CollectorRegistry()
        self.summary = Summary('s', 'help', registry=self.registry)

    def test_summary(self):
        self.assertEqual(0, self.registry.get_sample_value('s_count'))
        self.assertEqual(0, self.registry.get_sample_value('s_sum'))
        self.summary.observe(10)
        self.assertEqual(1, self.registry.get_sample_value('s_count'))
        self.assertEqual(10, self.registry.get_sample_value('s_sum'))

    def test_function_decorator(self):
        self.assertEqual(0, self.registry.get_sample_value('s_count'))

        @self.summary.time()
        def f():
            pass

        self.assertEqual(([], None, None, None), inspect.getargspec(f))

        f()
        self.assertEqual(1, self.registry.get_sample_value('s_count'))

    def test_function_decorator_multithread(self):
        self.assertEqual(0, self.registry.get_sample_value('s_count'))
        summary2 = Summary('s2', 'help', registry=self.registry)

        workers = 3
        duration = 0.1
        pool = ThreadPoolExecutor(max_workers=workers)

        @self.summary.time()
        def f():
            time.sleep(duration / 2)
            # Testing that different instances of timer do not interfere
            summary2.time()(lambda : time.sleep(duration / 2))()

        jobs = workers * 3
        for i in range(jobs):
            pool.submit(f)
        pool.shutdown(wait=True)

        self.assertEqual(jobs, self.registry.get_sample_value('s_count'))

        rounding_coefficient = 0.9
        total_expected_duration = jobs * duration * rounding_coefficient
        self.assertLess(total_expected_duration, self.registry.get_sample_value('s_sum'))
        self.assertLess(total_expected_duration / 2 , self.registry.get_sample_value('s2_sum'))

    def test_block_decorator(self):
        self.assertEqual(0, self.registry.get_sample_value('s_count'))
        with self.summary.time():
            pass
        self.assertEqual(1, self.registry.get_sample_value('s_count'))
Пример #4
0
    def test_duplicate_metrics_raises(self):
        registry = CollectorRegistry()
        Counter('c_total', 'help', registry=registry)
        self.assertRaises(ValueError, Counter, 'c_total', 'help', registry=registry)
        self.assertRaises(ValueError, Gauge, 'c_total', 'help', registry=registry)
        self.assertRaises(ValueError, Gauge, 'c_created', 'help', registry=registry)

        Gauge('g_created', 'help', registry=registry)
        self.assertRaises(ValueError, Gauge, 'g_created', 'help', registry=registry)
        self.assertRaises(ValueError, Counter, 'g', 'help', registry=registry)

        Summary('s', 'help', registry=registry)
        self.assertRaises(ValueError, Summary, 's', 'help', registry=registry)
        self.assertRaises(ValueError, Gauge, 's_created', 'help', registry=registry)
        self.assertRaises(ValueError, Gauge, 's_sum', 'help', registry=registry)
        self.assertRaises(ValueError, Gauge, 's_count', 'help', registry=registry)
        # We don't currently expose quantiles, but let's prevent future
        # clashes anyway.
        self.assertRaises(ValueError, Gauge, 's', 'help', registry=registry)

        Histogram('h', 'help', registry=registry)
        self.assertRaises(ValueError, Histogram, 'h', 'help', registry=registry)
        # Clashes aggaint various suffixes.
        self.assertRaises(ValueError, Summary, 'h', 'help', registry=registry)
        self.assertRaises(ValueError, Gauge, 'h_count', 'help', registry=registry)
        self.assertRaises(ValueError, Gauge, 'h_sum', 'help', registry=registry)
        self.assertRaises(ValueError, Gauge, 'h_bucket', 'help', registry=registry)
        self.assertRaises(ValueError, Gauge, 'h_created', 'help', registry=registry)
        # The name of the histogram itself is also taken.
        self.assertRaises(ValueError, Gauge, 'h', 'help', registry=registry)

        Info('i', 'help', registry=registry)
        self.assertRaises(ValueError, Gauge, 'i_info', 'help', registry=registry)
Пример #5
0
    def test_function_decorator_multithread(self):
        self.assertEqual(0, self.registry.get_sample_value('s_count'))
        summary2 = Summary('s2', 'help', registry=self.registry)

        workers = 3
        duration = 0.1
        pool = ThreadPoolExecutor(max_workers=workers)

        @self.summary.time()
        def f():
            time.sleep(duration / 2)
            # Testing that different instances of timer do not interfere
            summary2.time()(lambda: time.sleep(duration / 2))()

        jobs = workers * 3
        for i in range(jobs):
            pool.submit(f)
        pool.shutdown(wait=True)

        self.assertEqual(jobs, self.registry.get_sample_value('s_count'))

        rounding_coefficient = 0.9
        total_expected_duration = jobs * duration * rounding_coefficient
        self.assertLess(total_expected_duration,
                        self.registry.get_sample_value('s_sum'))
        self.assertLess(total_expected_duration / 2,
                        self.registry.get_sample_value('s2_sum'))
Пример #6
0
 def test_summary_adds(self):
     s1 = Summary('s', 'help', registry=None)
     s2 = Summary('s', 'help', registry=None)
     self.assertEqual(0, self.registry.get_sample_value('s_count'))
     self.assertEqual(0, self.registry.get_sample_value('s_sum'))
     s1.observe(1)
     s2.observe(2)
     self.assertEqual(2, self.registry.get_sample_value('s_count'))
     self.assertEqual(3, self.registry.get_sample_value('s_sum'))
Пример #7
0
    def test_restricted_registry(self):
        registry = CollectorRegistry()
        Counter('c_total', 'help', registry=registry)
        Summary('s', 'help', registry=registry).observe(7)

        m = Metric('s', 'help', 'summary')
        m.samples = [Sample('s_sum', {}, 7)]
        self.assertEqual([m], registry.restricted_registry(['s_sum']).collect())
Пример #8
0
    def test_timer_not_observable(self):
        s = Summary('test', 'help', labelnames=('label',), registry=self.registry)

        def manager():
            with s.time():
                pass

        assert_not_observable(manager)
Пример #9
0
 def test_summary_adds(self):
     s1 = Summary('s', 'help', registry=None)
     values.ValueClass = MultiProcessValue(lambda: 456)
     s2 = Summary('s', 'help', registry=None)
     self.assertEqual(0, self.registry.get_sample_value('s_count'))
     self.assertEqual(0, self.registry.get_sample_value('s_sum'))
     s1.observe(1)
     s2.observe(2)
     self.assertEqual(2, self.registry.get_sample_value('s_count'))
     self.assertEqual(3, self.registry.get_sample_value('s_sum'))
Пример #10
0
 def test_unregister_works(self):
     registry = CollectorRegistry()
     s = Summary('s', 'help', registry=registry)
     self.assertRaises(ValueError,
                       Gauge,
                       's_count',
                       'help',
                       registry=registry)
     registry.unregister(s)
     Gauge('s_count', 'help', registry=registry)
Пример #11
0
    def test_target_info_restricted_registry(self):
        registry = CollectorRegistry(target_info={'foo': 'bar'})
        Summary('s', 'help', registry=registry).observe(7)

        m = Metric('s', 'help', 'summary')
        m.samples = [Sample('s_sum', {}, 7)]
        self.assertEqual([m], registry.restricted_registry(['s_sum']).collect())

        m = Metric('target', 'Target metadata', 'info')
        m.samples = [Sample('target_info', {'foo': 'bar'}, 1)]
        self.assertEqual([m], registry.restricted_registry(['target_info']).collect())
Пример #12
0
    def test_reset_registry_with_labels(self):
        registry = CollectorRegistry()

        gauge = Gauge('g', 'help', ['l'], registry=registry)
        gauge.labels('a').inc()
        self.assertEqual(1, registry.get_sample_value('g', {'l': 'a'}))

        counter = Counter('c_total', 'help', ['l'], registry=registry)
        counter.labels('a').inc()
        self.assertEqual(1, registry.get_sample_value('c_total', {'l': 'a'}))

        summary = Summary('s', 'help', ['l'], registry=registry)
        summary.labels('a').observe(10)
        self.assertEqual(1, registry.get_sample_value('s_count', {'l': 'a'}))
        self.assertEqual(10, registry.get_sample_value('s_sum', {'l': 'a'}))

        histogram = Histogram('h', 'help', ['l'], registry=registry)
        histogram.labels('a').observe(2)
        self.assertEqual(0, registry.get_sample_value('h_bucket', {'le': '1.0', 'l': 'a'}))
        self.assertEqual(1, registry.get_sample_value('h_bucket', {'le': '2.5', 'l': 'a'}))
        self.assertEqual(1, registry.get_sample_value('h_bucket', {'le': '5.0', 'l': 'a'}))
        self.assertEqual(1, registry.get_sample_value('h_bucket', {'le': '+Inf', 'l': 'a'}))
        self.assertEqual(1, registry.get_sample_value('h_count', {'l': 'a'}))
        self.assertEqual(2, registry.get_sample_value('h_sum', {'l': 'a'}))


        registry.reset()

        self.assertEqual(0, registry.get_sample_value('g', {'l': 'a'}))

        self.assertEqual(0, registry.get_sample_value('c_total', {'l': 'a'}))

        self.assertEqual(0, registry.get_sample_value('s_count', {'l': 'a'}))
        self.assertEqual(0, registry.get_sample_value('s_sum', {'l': 'a'}))

        self.assertEqual(0, registry.get_sample_value('h_bucket', {'le': '1.0', 'l': 'a'}))
        self.assertEqual(0, registry.get_sample_value('h_bucket', {'le': '2.5', 'l': 'a'}))
        self.assertEqual(0, registry.get_sample_value('h_bucket', {'le': '5.0', 'l': 'a'}))
        self.assertEqual(0, registry.get_sample_value('h_bucket', {'le': '+Inf', 'l': 'a'}))
        self.assertEqual(0, registry.get_sample_value('h_count', {'l': 'a'}))
        self.assertEqual(0, registry.get_sample_value('h_sum', {'l': 'a'}))
Пример #13
0
    def test_restricted_registry_does_not_call_extra(self):
        from unittest.mock import MagicMock
        registry = CollectorRegistry()
        mock_collector = MagicMock()
        mock_collector.describe.return_value = [Metric('foo', 'help', 'summary')]
        registry.register(mock_collector)
        Summary('s', 'help', registry=registry).observe(7)

        m = Metric('s', 'help', 'summary')
        m.samples = [Sample('s_sum', {}, 7)]
        self.assertEqual([m], list(registry.restricted_registry(['s_sum']).collect()))
        mock_collector.collect.assert_not_called()
Пример #14
0
    def test_restricted_registry_does_not_yield_while_locked(self):
        registry = CollectorRegistry(target_info={'foo': 'bar'})
        Summary('s', 'help', registry=registry).observe(7)

        m = Metric('s', 'help', 'summary')
        m.samples = [Sample('s_sum', {}, 7)]
        self.assertEqual([m], list(registry.restricted_registry(['s_sum']).collect()))

        m = Metric('target', 'Target metadata', 'info')
        m.samples = [Sample('target_info', {'foo': 'bar'}, 1)]
        for _ in registry.restricted_registry(['target_info', 's_sum']).collect():
            self.assertFalse(registry._lock.locked())
Пример #15
0
 def setUp(self):
     self.registry = CollectorRegistry()
     self.summary = Summary('s', 'help', registry=self.registry)
Пример #16
0
    def test_reset_registry(self):
        registry = CollectorRegistry()

        gauge = Gauge('g', 'help', registry=registry)
        gauge.inc()
        self.assertEqual(1, registry.get_sample_value('g'))

        counter = Counter('c_total', 'help', registry=registry)
        counter.inc()
        self.assertEqual(1, registry.get_sample_value('c_total'))

        summary = Summary('s', 'help', registry=registry)
        summary.observe(10)
        self.assertEqual(1, registry.get_sample_value('s_count'))
        self.assertEqual(10, registry.get_sample_value('s_sum'))

        histogram = Histogram('h', 'help', registry=registry)
        histogram.observe(2)
        self.assertEqual(0, registry.get_sample_value('h_bucket', {'le': '1.0'}))
        self.assertEqual(1, registry.get_sample_value('h_bucket', {'le': '2.5'}))
        self.assertEqual(1, registry.get_sample_value('h_bucket', {'le': '5.0'}))
        self.assertEqual(1, registry.get_sample_value('h_bucket', {'le': '+Inf'}))
        self.assertEqual(1, registry.get_sample_value('h_count'))
        self.assertEqual(2, registry.get_sample_value('h_sum'))


        registry.reset()

        self.assertEqual(0, registry.get_sample_value('g'))

        self.assertEqual(0, registry.get_sample_value('c_total'))

        self.assertEqual(0, registry.get_sample_value('s_count'))
        self.assertEqual(0, registry.get_sample_value('s_sum'))

        self.assertEqual(0, registry.get_sample_value('h_bucket', {'le': '1.0'}))
        self.assertEqual(0, registry.get_sample_value('h_bucket', {'le': '2.5'}))
        self.assertEqual(0, registry.get_sample_value('h_bucket', {'le': '5.0'}))
        self.assertEqual(0, registry.get_sample_value('h_bucket', {'le': '+Inf'}))
        self.assertEqual(0, registry.get_sample_value('h_count'))
        self.assertEqual(0, registry.get_sample_value('h_sum'))

        # --------------------------

        gauge.inc()
        gauge.inc()
        gauge.inc()
        gauge.dec()
        self.assertEqual(2, registry.get_sample_value('g'))

        counter.inc()
        counter.inc()
        counter.inc()
        self.assertEqual(3, registry.get_sample_value('c_total'))

        summary.observe(10)
        summary.observe(5)
        self.assertEqual(2, registry.get_sample_value('s_count'))
        self.assertEqual(15, registry.get_sample_value('s_sum'))

        histogram.observe(2)
        histogram.observe(6)
        histogram.observe(1)
        self.assertEqual(1, registry.get_sample_value('h_bucket', {'le': '1.0'}))
        self.assertEqual(2, registry.get_sample_value('h_bucket', {'le': '2.5'}))
        self.assertEqual(2, registry.get_sample_value('h_bucket', {'le': '5.0'}))
        self.assertEqual(3, registry.get_sample_value('h_bucket', {'le': '+Inf'}))
        self.assertEqual(3, registry.get_sample_value('h_count'))
        self.assertEqual(9, registry.get_sample_value('h_sum'))


        registry.reset()

        self.assertEqual(0, registry.get_sample_value('g'))

        self.assertEqual(0, registry.get_sample_value('c_total'))

        self.assertEqual(0, registry.get_sample_value('s_count'))
        self.assertEqual(0, registry.get_sample_value('s_sum'))

        self.assertEqual(0, registry.get_sample_value('h_bucket', {'le': '1.0'}))
        self.assertEqual(0, registry.get_sample_value('h_bucket', {'le': '2.5'}))
        self.assertEqual(0, registry.get_sample_value('h_bucket', {'le': '5.0'}))
        self.assertEqual(0, registry.get_sample_value('h_bucket', {'le': '+Inf'}))
        self.assertEqual(0, registry.get_sample_value('h_count'))
        self.assertEqual(0, registry.get_sample_value('h_sum'))
    def process_datapoint(self, datapoint):
        if (datapoint['feed'] != 'metrics'):
            log.debug(
                "'feed' field is not 'metrics' in datapoint, skipping: {}".
                format(datapoint))
            return

        daemon = str(datapoint['service']).replace('druid/', '').lower()

        if (daemon not in self.supported_metrics):
            log.debug("daemon '{}' is not supported, skipping: {}".format(
                daemon, datapoint))
            return

        metric_name = str(datapoint['metric'])

        if (metric_name not in self.supported_metrics[daemon]):
            log.debug("metric '{}' is not supported, skipping: {}".format(
                datapoint['metric'], datapoint))
            return

        config = self.supported_metrics[daemon][metric_name]
        config.setdefault('labels', [])
        config.setdefault('type', 'gauge')
        config.setdefault('suffix', '_count')

        metric_type = config['type']

        if metric_type == 'skip':
            return

        metric_name = self._get_metric_name(daemon, metric_name, config)
        metric_value = float(datapoint['value'])
        metric_labels = tuple(sorted(config['labels'] + ['host']))
        label_values = tuple(
            [datapoint[label_name] for label_name in metric_labels])

        if '_metric_' not in config:
            if metric_type == 'counter':
                config['_metric_'] = Counter(metric_name, metric_name,
                                             metric_labels)
            if metric_type == 'gauge':
                config['_metric_'] = Gauge(metric_name, metric_name,
                                           metric_labels)
            elif metric_type == 'summary':
                config['_metric_'] = Summary(metric_name, metric_name,
                                             metric_labels)
            elif metric_type == 'histogram':
                config['_metric_'] = Histogram(metric_name,
                                               metric_name,
                                               metric_labels,
                                               buckets=config['buckets'])

        metric = config['_metric_']

        if len(metric_labels) > 0:
            metric = metric.labels(*label_values)

        if metric_type == 'counter':
            metric.inc(metric_value)
        if metric_type == 'gauge':
            metric.set(metric_value)
        elif metric_type == 'summary':
            metric.observe(metric_value)
        elif metric_type == 'histogram':
            metric.observe(metric_value)

        self.datapoints_processed.inc()
class DruidCollector(object):
    scrape_duration = Summary('druid_scrape_duration_seconds',
                              'Druid scrape duration')

    def __init__(self, metrics_config, kafka_config=None):

        # The ingestion of the datapoints is separated from their processing,
        # to separate concerns and avoid unnecessary slowdowns for Druid
        # daemons sending data.
        # Only one thread de-queues and process datapoints, in this way we
        # don't really need any special locking to guarantee consistency.
        # Since this thread is not I/O bound it doesn't seem the case to
        # use a gevent's greenlet, but more tests might prove the contrary.
        self.datapoints_queue = queue.Queue()
        self.stop_threads = threading.Event()

        threading.Thread(target=self.process_queued_datapoints,
                         args=(self.stop_threads, )).start()

        # if a Kafka config is provided, create a dedicated thread
        # that pulls datapoints from a Kafka topic.
        # The thread will then push datapoints to the same queue that
        # the HTTP server uses. In this way the exporter allows a mixed
        # configuration for Druid Brokers between HTTPEmitter and
        # KafkaEmitter (for daemons emitting too many datapoints/s).
        if kafka_config:
            if KafkaConsumer:
                threading.Thread(target=self.pull_datapoints_from_kafka,
                                 args=(kafka_config,
                                       self.stop_threads)).start()
            else:
                log.error('A Kafka configuration was provided, but it seems '
                          'that the Kafka client library is not available. '
                          'Please install the correct dependencies.')

        # Datapoints successfully registered
        self.datapoints_registered = 0

        # Data structure holding histogram data
        # Format: {daemon: {metric_name: {bucket2: value, bucket2: value, ...}}
        self.histograms = defaultdict(lambda: {})

        # Data structure holding counters data
        # Format: {daemon: {label_name: {label2_name: value}}
        # The order of the labels listed in supported_metric_names is important
        # since it is reflected in this data structure. The layering is not
        # strictly important for the final prometheus metrics but
        # it is simplifies the code that creates them (collect method).
        self.counters = defaultdict(lambda: {})

        # List of metrics to collect/expose via the exporter
        self.metrics_config = metrics_config
        self.supported_daemons = list(self.metrics_config.keys())

    def stop_running_threads(self):
        self.stop_threads.set()

    @staticmethod
    def sanitize_field(datapoint_field):
        return datapoint_field.replace('druid/', '').lower()

    def store_counter(self, datapoint):
        """ This function adds data to the self.counters dictiorary
            following its convention, creating on the fly
            the missing bits. For example, given:
            self.counters = {}
            datapoint = {'service': 'druid/broker',
                         'metric'='segment/size',
                         'datasource': 'test', 'value': 10}

            This function will creates the following:
            self.counters = {
                'segment/size': {
                    'broker':
                        { ('test'): 10 }
                    }
                }

            The algorithm is generic enough to support all metrics handled by
            self.counters without caring about the number of labels needed.
        """
        daemon = DruidCollector.sanitize_field(str(datapoint['service']))
        metric_name = str(datapoint['metric'])
        metric_value = float(datapoint['value'])

        metrics_storage = self.counters[metric_name]
        metric_labels = self.metrics_config[daemon][metric_name]['labels']

        metrics_storage.setdefault(daemon, {})

        label_values = []
        if metric_labels:
            for label in metric_labels:
                try:
                    label_values.append(str(datapoint[label]))
                except KeyError as e:
                    log.error(
                        'Missing label {} for datapoint {} (expected labels: {}), '
                        'dropping it. Please check your metric configuration file.'
                        .format(label, datapoint, metric_labels))
                    return

        # Convert the list of labels to a tuple to allow indexing
        metrics_storage[daemon][tuple(label_values)] = metric_value
        log.debug("The datapoint {} modified the counters dictionary to: \n{}".
                  format(datapoint, self.counters))

    def store_histogram(self, datapoint):
        """ Store datapoints that will end up in histogram buckets using a dictiorary.
            This function is highly customized for the only histograms configured
            so far, rather than being generic like store_counter. Example of how
            it works:
            self.histograms = {}
            datapoint = {'service': 'druid/broker', 'metric'='query/time',
                         'datasource': 'test', 'value': 10}

            This function will creates the following:
            self.histograms = {
                'query/time': {
                    'broker': {
                        ('test'): {'10': 1, '100': 1, etc.., 'sum': 10 }
                    }
                }
            }
        """
        daemon = DruidCollector.sanitize_field(str(datapoint['service']))
        metric_name = str(datapoint['metric'])
        metric_value = float(datapoint['value'])
        metric_labels = self.metrics_config[daemon][metric_name]['labels']
        metric_buckets = self.metrics_config[daemon][metric_name]['buckets']

        self.histograms.setdefault(metric_name, {daemon: {}})
        self.histograms[metric_name].setdefault(daemon, {})

        label_values = []
        if metric_labels:
            for label in metric_labels:
                try:
                    label_values.append(str(datapoint[label]))
                except KeyError as e:
                    log.error(
                        'Missing label {} for datapoint {} (expected labels: {}), '
                        'dropping it. Please check your metric configuration file.'
                        .format(label, metric_labels, datapoint))
                    return

        # Convert the list of labels to a tuple to allow indexing
        self.histograms[metric_name][daemon].setdefault(
            tuple(label_values), {})

        stored_buckets = self.histograms[metric_name][daemon][tuple(
            label_values)]
        for bucket in metric_buckets:
            if bucket not in stored_buckets:
                stored_buckets[bucket] = 0
            if bucket != 'sum' and metric_value <= float(bucket):
                stored_buckets[bucket] += 1
        stored_buckets['sum'] += metric_value

        log.debug(
            "The datapoint {} modified the histograms dictionary to: \n{}".
            format(datapoint, self.histograms))

    @scrape_duration.time()
    def collect(self):
        # Loop through all metrics configured, and get datapoints
        # for them saved by the exporter.
        for daemon in self.metrics_config.keys():
            for druid_metric_name in self.metrics_config[daemon]:
                metric_type = self.metrics_config[daemon][druid_metric_name][
                    'type']

                if metric_type == 'gauge' or metric_type == 'counter':
                    try:
                        self.counters[druid_metric_name]
                        self.counters[druid_metric_name][daemon]
                    except KeyError:
                        continue

                    if metric_type == 'gauge':
                        metric_family_obj = GaugeMetricFamily
                    else:
                        metric_family_obj = CounterMetricFamily

                    prometheus_metric = metric_family_obj(
                        self.metrics_config[daemon][druid_metric_name]
                        ['prometheus_metric_name'],
                        self.metrics_config[daemon][druid_metric_name]
                        ['description'],
                        labels=map(
                            lambda x: x.lower(), self.metrics_config[daemon]
                            [druid_metric_name]['labels']))
                    label_values = list(
                        self.counters[druid_metric_name][daemon].keys())
                    for label_value in label_values:
                        value = self.counters[druid_metric_name][daemon][
                            label_value]
                        prometheus_metric.add_metric(label_value, value)

                elif metric_type == 'histogram':
                    try:
                        self.histograms[druid_metric_name]
                        self.histograms[druid_metric_name][daemon]
                    except KeyError:
                        continue

                    prometheus_metric = HistogramMetricFamily(
                        self.metrics_config[daemon][druid_metric_name]
                        ['prometheus_metric_name'],
                        self.metrics_config[daemon][druid_metric_name]
                        ['description'],
                        labels=map(
                            lambda x: x.lower(), self.metrics_config[daemon]
                            [druid_metric_name]['labels']))

                    label_values = list(
                        self.histograms[druid_metric_name][daemon].keys())
                    for label_value in label_values:
                        value = self.histograms[druid_metric_name][daemon][
                            label_value]
                        buckets_without_sum = [[key, value]
                                               for key, value in value.items()
                                               if key != 'sum']
                        prometheus_metric.add_metric(
                            label_value,
                            buckets=buckets_without_sum,
                            sum_value=value['sum'])

                else:
                    log.info(
                        'metric type not supported: {}'.format(metric_type))
                    continue

                yield prometheus_metric

        registered = CounterMetricFamily(
            'druid_exporter_datapoints_registered',
            'Number of datapoints successfully registered '
            'by the exporter.')
        registered.add_metric([], self.datapoints_registered)
        yield registered

    def register_datapoint(self, datapoint):
        if (datapoint['feed'] != 'metrics'):
            log.debug("The following feed does not contain a datapoint, "
                      "dropping it: {}".format(datapoint))
            return

        daemon = DruidCollector.sanitize_field(str(datapoint['service']))
        if (datapoint['feed'] != 'metrics'
                or daemon not in self.supported_daemons or datapoint['metric']
                not in self.metrics_config[daemon].keys()):
            log.debug(
                "The following datapoint is not supported, either "
                "because the 'feed' field is not 'metrics' or "
                "the daemon name ({}) is not listed in the supported ones ({}) or "
                "the metric itself is not listed in the exporter's config file: {}"
                .format(daemon, self.supported_daemons, datapoint))
            return

        self.datapoints_queue.put((daemon, datapoint))

    def process_queued_datapoints(self, stop_threads):
        log.debug('Process datapoints thread starting..')

        while True and not stop_threads.isSet():
            (daemon, datapoint) = self.datapoints_queue.get()
            metric_name = str(datapoint['metric'])
            if self.metrics_config[daemon][metric_name]['type'] == 'histogram':
                self.store_histogram(datapoint)
            else:
                self.store_counter(datapoint)

            self.datapoints_registered += 1

        log.debug('Process datapoints thread shutting down..')

    def pull_datapoints_from_kafka(self, kafka_config, stop_threads):
        log.debug('Kafka datapoints puller thread starting..')

        consumer = KafkaConsumer(
            kafka_config['topic'],
            group_id=kafka_config['group_id'],
            bootstrap_servers=kafka_config['bootstrap_servers'])

        while True and not stop_threads.isSet():
            consumer.poll()
            for message in consumer:
                try:
                    json_message = json.loads(message.value.decode())
                    log.debug('Datapoint from kafka: %s', json_message)
                    if type(json_message) == list:
                        for datapoint in json_message:
                            self.register_datapoint(datapoint)
                    else:
                        self.register_datapoint(json_message)
                except json.JSONDecodeError:
                    log.exception(
                        "Failed to decode message from Kafka, skipping..")
                except Exception as e:
                    log.exception(
                        "Generic exception while pulling datapoints from Kafka"
                    )

        log.debug('Kafka datapoints puller thread shutting down..')
Пример #19
0
    def process_datapoint(self, datapoint):
        global sep_config
        if (datapoint['feed'] != 'metrics'):
            log.debug(
                "'feed' field is not 'metrics' in datapoint, skipping: {}".
                format(datapoint))
            return

        daemon = str(datapoint['service']).replace('druid/', '').lower()

        if (daemon not in self.supported_metrics):
            log.warn("daemon '{}' is not supported, skipping: {}".format(
                daemon, datapoint))
            return

        metric_name = str(datapoint['metric'])

        if (metric_name not in self.supported_metrics[daemon]):
            log.warn("metric '{}' is not supported, skipping: {}".format(
                datapoint['metric'], datapoint))
            return

#        if 'sep_config' not in locals():
#            sep_config = {}
        if daemon not in sep_config:
            sep_config[daemon] = {}
            log.debug("Reverse Metric: {}".format(sep_config))
        if metric_name not in sep_config[daemon]:
            sep_config[daemon][metric_name] = copy.copy(
                self.supported_metrics[daemon][metric_name])
            log.debug("Reverse IFtrue: {}")
        else:
            sep_config[daemon][metric_name] = sep_config[daemon][metric_name]
            log.debug("Reverse IFelse: {}")
        #config = self.supported_metrics[daemon][metric_name]
        log.debug("Reverse Metric: {}".format(sep_config))
        sep_config[daemon][metric_name].setdefault('labels', [])
        sep_config[daemon][metric_name].setdefault('type', 'gauge')
        sep_config[daemon][metric_name].setdefault('suffix', '_count')

        metric_type = sep_config[daemon][metric_name]['type']

        if metric_type == 'skip':
            return

        metric_name_full = self._get_metric_name(
            daemon, metric_name, sep_config[daemon][metric_name])
        metric_value = float(datapoint['value'])
        metric_labels = tuple(
            sorted(sep_config[daemon][metric_name]['labels'] + ['host']))
        log.debug("Labels: {}".format(metric_labels))
        label_values = tuple([
            datapoint[label_name.replace('_', ' ')]
            for label_name in metric_labels
        ])
        log.debug("Labels value: {}".format(label_values))

        if '_metric_' not in sep_config[daemon][metric_name]:
            if metric_type == 'counter':
                sep_config[daemon][metric_name]['_metric_'] = Counter(
                    metric_name_full, metric_name_full, metric_labels)
            if metric_type == 'gauge':
                sep_config[daemon][metric_name]['_metric_'] = Gauge(
                    metric_name_full, metric_name_full, metric_labels)
            elif metric_type == 'summary':
                sep_config[daemon][metric_name]['_metric_'] = Summary(
                    metric_name_full, metric_name_full, metric_labels)
            elif metric_type == 'histogram':
                sep_config[daemon][metric_name]['_metric_'] = Histogram(
                    metric_name_full,
                    metric_name_full,
                    metric_labels,
                    buckets=sep_config[daemon][metric_name]['buckets'])
                log.debug("final metric_name: {}".format(metric_name))
                log.debug("sep config : {}".format(sep_config[daemon]))

        metric = sep_config[daemon][metric_name]['_metric_']

        if len(metric_labels) > 0:
            metric = metric.labels(*label_values)

        if metric_type == 'counter':
            metric.inc(metric_value)
        if metric_type == 'gauge':
            metric.set(metric_value)
        elif metric_type == 'summary':
            metric.observe(metric_value)
        elif metric_type == 'histogram':
            metric.observe(metric_value)

        self.datapoints_processed.inc()
Пример #20
0
dotenv_path = os.path.join(os.path.dirname(__file__), '.env')
load_dotenv(dotenv_path)
# 设置监听的web服务器端口
HTTP_PORT = int(os.environ.get("HTTP_PORT"))
# 设置oracle连接用户名密码
DATA_SOURCE_NAME = os.environ.get("DATA_SOURCE_NAME")
# 设置日志格式
LOG_LEVEL = os.environ.get("LOG_LEVEL")
FORMAT = '%(asctime)-15s %(thread)-5d:%(message)s'
logging.basicConfig(format=FORMAT)
logger = logging.getLogger('oracledb_exporter')
logger.setLevel(LOG_LEVEL)
# 收集数据所用时长
COLLECTION_TIME = Summary(
    'oracledb_collector_collect_seconds',
    'Time spent to collect metrics from Oracle'
)
SCRAPE_INTERVAL = float(os.environ.get("SCRAPE_INTERVAL"))


class OracleCollector(object):

    def __init__(self):
        self.db_connect, self.db_cursor = '', ''
        self.database_version_gauge = Gauge(
            "oracledb_version_info",
            ""
            "TYPE gauge.",
            ["version"]
        )
        # ["oracle", "plsql", "core", "tns", "nlsrtl"],
Пример #21
0
import json
import datetime
import sys
import logging
import asyncio
from concurrent import futures
from cachetools import cached, TTLCache, cachedmethod
from aliyunsdkcore.client import AcsClient
from aliyunsdkrds.request.v20140815.DescribeDBInstancesRequest import DescribeDBInstancesRequest
from aliyunsdkrds.request.v20140815.DescribeDBInstancePerformanceRequest import DescribeDBInstancePerformanceRequest
from aliyunsdkrds.request.v20140815.DescribeResourceUsageRequest import DescribeResourceUsageRequest
from prometheus_client.core import Summary, GaugeMetricFamily, InfoMetricFamily
from prometheus_client import Counter, Info

# 这里的api_request是用来记录阿里云API调用的延迟
api_request_summry = Summary('aliyun_api_request_latency_seconds',
                             'CloudMonitor request latency', ['api'])
api_request_failed_summry = Summary(
    'aliyun_api_failed_request_latency_seconds',
    'CloudMonitor failed request latency', ['api'])
# 记录阿里云API调用次数
api_request_count = Counter(
    'aliyun_api_request_counter',
    'Aliyun API request counter',
)


class CollectorConfig(object):
    def __init__(
        self,
        file_opts,
        command_args,
class DruidCollector(object):
    scrape_duration = Summary('druid_scrape_duration_seconds',
                              'Druid scrape duration')

    def __init__(self, metrics_config):
        # Datapoints successfully registered
        self.datapoints_registered = 0

        # Data structure holding histogram data
        # Format: {daemon: {metric_name: {bucket2: value, bucket2: value, ...}}
        self.histograms = defaultdict(lambda: {})

        # Data structure holding counters data
        # Format: {daemon: {label_name: {label2_name: value}}
        # The order of the labels listed in supported_metric_names is important
        # since it is reflected in this data structure. The layering is not
        # strictly important for the final prometheus metrics but
        # it is simplifies the code that creates them (collect method).
        self.counters = defaultdict(lambda: {})

        # List of metrics to collect/expose via the exporter
        self.metrics_config = metrics_config
        self.supported_daemons = self.metrics_config.keys()

    @staticmethod
    def sanitize_field(datapoint_field):
        return datapoint_field.replace('druid/', '').lower()

    def store_counter(self, datapoint):
        """ This function adds data to the self.counters dictiorary
            following its convention, creating on the fly
            the missing bits. For example, given:
            self.counters = {}
            datapoint = {'service': 'druid/broker',
                         'metric'='segment/size',
                         'datasource': 'test', 'value': 10}

            This function will creates the following:
            self.counters = {
                'segment/size': {
                    'broker':
                        { ('test'): 10 }
                    }
                }

            The algorithm is generic enough to support all metrics handled by
            self.counters without caring about the number of labels needed.
        """
        daemon = DruidCollector.sanitize_field(str(datapoint['service']))
        metric_name = str(datapoint['metric'])
        metric_value = float(datapoint['value'])

        metrics_storage = self.counters[metric_name]
        metric_labels = self.metrics_config[daemon][metric_name]['labels']

        metrics_storage.setdefault(daemon, {})

        label_values = []
        if metric_labels:
            for label in metric_labels:
                try:
                    label_values.append(str(datapoint[label]))
                except KeyError as e:
                    log.error(
                        'Missing label {} for datapoint {} (expected labels: {}), '
                        'dropping it. Please check your metric configuration file.'
                        .format(label, metric_labels, datapoint))
                    return

        # Convert the list of labels to a tuple to allow indexing
        metrics_storage[daemon][tuple(label_values)] = metric_value
        log.debug("The datapoint {} modified the counters dictionary to: \n{}".
                  format(datapoint, self.counters))

    def store_histogram(self, datapoint):
        """ Store datapoints that will end up in histogram buckets using a dictiorary.
            This function is highly customized for the only histograms configured
            so far, rather than being generic like store_counter. Example of how
            it works:
            self.histograms = {}
            datapoint = {'service': 'druid/broker', 'metric'='query/time',
                         'datasource': 'test', 'value': 10}

            This function will creates the following:
            self.histograms = {
                'query/time': {
                    'broker': {
                        ('test'): {'10': 1, '100': 1, etc.., 'sum': 10 }
                    }
                }
            }
        """
        daemon = DruidCollector.sanitize_field(str(datapoint['service']))
        metric_name = str(datapoint['metric'])
        metric_value = float(datapoint['value'])
        metric_labels = self.metrics_config[daemon][metric_name]['labels']
        metric_buckets = self.metrics_config[daemon][metric_name]['buckets']

        self.histograms.setdefault(metric_name, {daemon: {}})
        self.histograms[metric_name].setdefault(daemon, {})

        label_values = []
        if metric_labels:
            for label in metric_labels:
                try:
                    label_values.append(str(datapoint[label]))
                except KeyError as e:
                    log.error(
                        'Missing label {} for datapoint {} (expected labels: {}), '
                        'dropping it. Please check your metric configuration file.'
                        .format(label, metric_labels, datapoint))
                    return

        # Convert the list of labels to a tuple to allow indexing
        self.histograms[metric_name][daemon].setdefault(
            tuple(label_values), {})

        stored_buckets = self.histograms[metric_name][daemon][tuple(
            label_values)]
        for bucket in metric_buckets:
            if bucket not in stored_buckets:
                stored_buckets[bucket] = 0
            if bucket != 'sum' and metric_value <= float(bucket):
                stored_buckets[bucket] += 1
        stored_buckets['sum'] += metric_value

        log.debug(
            "The datapoint {} modified the histograms dictionary to: \n{}".
            format(datapoint, self.histograms))

    @scrape_duration.time()
    def collect(self):
        # Loop through all metrics configured, and get datapoints
        # for them saved by the exporter.
        for daemon in self.metrics_config.keys():
            for druid_metric_name in self.metrics_config[daemon]:
                metric_type = self.metrics_config[daemon][druid_metric_name][
                    'type']

                if metric_type == 'gauge' or metric_type == 'counter':
                    try:
                        self.counters[druid_metric_name]
                        self.counters[druid_metric_name][daemon]
                    except KeyError:
                        continue

                    if metric_type == 'gauge':
                        metric_family_obj = GaugeMetricFamily
                    else:
                        metric_family_obj = CounterMetricFamily

                    prometheus_metric = metric_family_obj(
                        self.metrics_config[daemon][druid_metric_name]
                        ['prometheus_metric_name'],
                        self.metrics_config[daemon][druid_metric_name]
                        ['description'],
                        labels=map(
                            lambda x: x.lower(), self.metrics_config[daemon]
                            [druid_metric_name]['labels']))
                    label_values = list(
                        self.counters[druid_metric_name][daemon].keys())
                    for label_value in label_values:
                        value = self.counters[druid_metric_name][daemon][
                            label_value]
                        prometheus_metric.add_metric(label_value, value)

                elif metric_type == 'histogram':
                    try:
                        self.histograms[druid_metric_name]
                        self.histograms[druid_metric_name][daemon]
                    except KeyError:
                        continue

                    prometheus_metric = HistogramMetricFamily(
                        self.metrics_config[daemon][druid_metric_name]
                        ['prometheus_metric_name'],
                        self.metrics_config[daemon][druid_metric_name]
                        ['description'],
                        labels=map(
                            lambda x: x.lower(), self.metrics_config[daemon]
                            [druid_metric_name]['labels']))

                    label_values = list(
                        self.histograms[druid_metric_name][daemon].keys())
                    for label_value in label_values:
                        value = self.histograms[druid_metric_name][daemon][
                            label_value]
                        buckets_without_sum = [[key, value]
                                               for key, value in value.items()
                                               if key != 'sum']
                        prometheus_metric.add_metric(
                            label_value,
                            buckets=buckets_without_sum,
                            sum_value=value['sum'])

                else:
                    log.info(
                        'metric type not supported: {}'.format(metric_type))
                    continue

                yield prometheus_metric

        registered = CounterMetricFamily(
            'druid_exporter_datapoints_registered',
            'Number of datapoints successfully registered '
            'by the exporter.')
        registered.add_metric([], self.datapoints_registered)
        yield registered

    def register_datapoint(self, datapoint):
        if (datapoint['feed'] != 'metrics'):
            log.debug("The following feed does not contain a datapoint, "
                      "dropping it: {}".format(datapoint))
            return

        daemon = DruidCollector.sanitize_field(str(datapoint['service']))
        if (datapoint['feed'] != 'metrics'
                or daemon not in self.supported_daemons or datapoint['metric']
                not in self.metrics_config[daemon].keys()):
            log.debug(
                "The following datapoint is not supported, either "
                "because the 'feed' field is not 'metrics' or "
                "the metric itself is not supported: {}".format(datapoint))
            return

        metric_name = str(datapoint['metric'])
        if self.metrics_config[daemon][metric_name]['type'] == 'histogram':
            self.store_histogram(datapoint)
        else:
            self.store_counter(datapoint)

        self.datapoints_registered += 1
class Logger:
    """Class used to display logs on the console.
    """

    def __init__(self):
        logger = logging.getLogger()
        logger.setLevel(logging.INFO)
        formatter = logging.Formatter('%(asctime)s %(message)s')
        stream_handler = logging.StreamHandler()
        stream_handler.setFormatter(formatter)
        logger.addHandler(stream_handler)
        self.logger = logger


# Create a metric to track time spent and requests made.
REQUEST_TIME = Summary('request_processing_seconds', 'Time spent processing request')


class CloudwatchLogsCollector:
    """Class used to get metrics from AWS Cloudwatch Logs.
    """

    def __init__(self):
        self.client = boto3.client('logs')
        self.metric_prefix = "aws_logs_"
        self.logger = Logger().logger

    @REQUEST_TIME.time()
    def collect_log_groups(self):
        log_group_stored_byte = GaugeMetricFamily(
            self.metric_prefix + 'stored_bytes',
Пример #24
0
class TestSummary(unittest.TestCase):
    def setUp(self):
        self.registry = CollectorRegistry()
        self.summary = Summary('s', 'help', registry=self.registry)

    def test_repr(self):
        self.assertEqual(repr(self.summary), "prometheus_client.metrics.Summary(s)")

    def test_summary(self):
        self.assertEqual(0, self.registry.get_sample_value('s_count'))
        self.assertEqual(0, self.registry.get_sample_value('s_sum'))
        self.summary.observe(10)
        self.assertEqual(1, self.registry.get_sample_value('s_count'))
        self.assertEqual(10, self.registry.get_sample_value('s_sum'))

    def test_function_decorator(self):
        self.assertEqual(0, self.registry.get_sample_value('s_count'))

        @self.summary.time()
        def f():
            pass

        self.assertEqual(([], None, None, None), getargspec(f))

        f()
        self.assertEqual(1, self.registry.get_sample_value('s_count'))

    def test_function_decorator_multithread(self):
        self.assertEqual(0, self.registry.get_sample_value('s_count'))
        summary2 = Summary('s2', 'help', registry=self.registry)

        workers = 3
        duration = 0.1
        pool = ThreadPoolExecutor(max_workers=workers)

        @self.summary.time()
        def f():
            time.sleep(duration / 2)
            # Testing that different instances of timer do not interfere
            summary2.time()(lambda: time.sleep(duration / 2))()

        jobs = workers * 3
        for i in range(jobs):
            pool.submit(f)
        pool.shutdown(wait=True)

        self.assertEqual(jobs, self.registry.get_sample_value('s_count'))

        rounding_coefficient = 0.9
        total_expected_duration = jobs * duration * rounding_coefficient
        self.assertLess(total_expected_duration, self.registry.get_sample_value('s_sum'))
        self.assertLess(total_expected_duration / 2, self.registry.get_sample_value('s2_sum'))

    def test_function_decorator_reentrancy(self):
        self.assertEqual(0, self.registry.get_sample_value('s_count'))

        iterations = 2
        sleep = 0.1

        @self.summary.time()
        def f(i=1):
            time.sleep(sleep)
            if i == iterations:
                return
            f(i + 1)

        f()

        self.assertEqual(iterations, self.registry.get_sample_value('s_count'))

        # Arithmetic series with d == a_1
        total_expected_duration = sleep * (iterations ** 2 + iterations) / 2
        rounding_coefficient = 0.9
        total_expected_duration *= rounding_coefficient
        self.assertLess(total_expected_duration, self.registry.get_sample_value('s_sum'))

    def test_block_decorator(self):
        self.assertEqual(0, self.registry.get_sample_value('s_count'))
        with self.summary.time():
            pass
        self.assertEqual(1, self.registry.get_sample_value('s_count'))

    def test_timer_not_observable(self):
        s = Summary('test', 'help', labelnames=('label',), registry=self.registry)

        try:
            s.time()
        except ValueError as e:
            self.assertIn('missing label values', str(e))
class DruidCollector(object):
    scrape_duration = Summary('druid_scrape_duration_seconds',
                              'Druid scrape duration')

    def __init__(self):
        # Datapoints successfully registered
        self.datapoints_registered = 0

        # List of supported metrics and their fields of the JSON dictionary
        # sent by a Druid daemon. These fields will be added as labels
        # when returning the available metrics in @collect.
        # Due to the fact that metric names are not unique (like segment/count),
        # it is necessary to split the data structure by daemon.
        self.supported_metric_names = {
            'middlemanager': {
                'jetty/numOpenConnections': None,
                'jvm/pool/committed': ['poolKind', 'poolName'],
                'jvm/pool/init': ['poolKind', 'poolName'],
                'jvm/pool/max': ['poolKind', 'poolName'],
                'jvm/pool/used': ['poolKind', 'poolName'],
                'jvm/mem/init': ['memKind'],
                'jvm/mem/max': ['memKind'],
                'jvm/mem/used': ['memKind'],
                'jvm/mem/committed': ['memKind'],
                'jvm/gc/count': ['gcName'],
                'jvm/gc/time': ['gcName'],
            },
            'broker': {
                'jetty/numOpenConnections': None,
                'query/time': ['dataSource'],
                'query/bytes': ['dataSource'],
                'query/node/time': None,
                'query/node/bytes': None,
                'query/node/ttfb': None,
                'query/intervalChunk/time': None,
                'query/success/count': None,
                'query/failed/count': None,
                'query/interrupted/count': None,
                'query/cache/total/numEntries': None,
                'query/cache/total/sizeBytes': None,
                'query/cache/total/hits': None,
                'query/cache/total/misses': None,
                'query/cache/total/evictions': None,
                'query/cache/total/timeouts': None,
                'query/cache/total/errors': None,
                'jvm/pool/committed': ['poolKind', 'poolName'],
                'jvm/pool/init': ['poolKind', 'poolName'],
                'jvm/pool/max': ['poolKind', 'poolName'],
                'jvm/pool/used': ['poolKind', 'poolName'],
                'jvm/mem/init': ['memKind'],
                'jvm/mem/max': ['memKind'],
                'jvm/mem/used': ['memKind'],
                'jvm/mem/committed': ['memKind'],
                'jvm/gc/count': ['gcName'],
                'jvm/gc/time': ['gcName'],
            },
            'historical': {
                'jetty/numOpenConnections': None,
                'query/time': ['dataSource'],
                'query/bytes': ['dataSource'],
                'query/cpu/time': ['dataSource'],
                'query/segment/time': None,
                'query/wait/time': None,
                'query/success/count': None,
                'query/failed/count': None,
                'query/interrupted/count': None,
                'query/segmentAndCache/time': None,
                'query/cache/total/numEntries': None,
                'query/cache/total/sizeBytes': None,
                'query/cache/total/hits': None,
                'query/cache/total/misses': None,
                'query/cache/total/evictions': None,
                'query/cache/total/timeouts': None,
                'query/cache/total/errors': None,
                'segment/count': ['tier', 'dataSource'],
                'segment/max': None,
                'segment/used': ['tier', 'dataSource'],
                'segment/usedPercent': ['tier', 'dataSource'],
                'segment/scan/pending': None,
                'jvm/pool/committed': None,
                'jvm/pool/init': None,
                'jvm/pool/max': None,
                'jvm/pool/used': None,
                'jvm/mem/init': None,
                'jvm/mem/max': None,
                'jvm/mem/used': None,
                'jvm/mem/committed': None,
                'jvm/gc/count': None,
                'jvm/gc/time': None,
            },
            'coordinator': {
                'jetty/numOpenConnections': None,
                'segment/count': ['dataSource'],
                'segment/assigned/count': ['tier'],
                'segment/moved/count': ['tier'],
                'segment/dropped/count': ['tier'],
                'segment/deleted/count': ['tier'],
                'segment/unneeded/count': ['tier'],
                'segment/overShadowed/count': None,
                'segment/loadQueue/failed': ['server'],
                'segment/loadQueue/count': ['server'],
                'segment/dropQueue/count': ['server'],
                'segment/size': ['dataSource'],
                'segment/unavailable/count': ['dataSource'],
                'segment/underReplicated/count': ['tier', 'dataSource'],
                'jvm/pool/committed': ['poolKind', 'poolName'],
                'jvm/pool/init': ['poolKind', 'poolName'],
                'jvm/pool/max': ['poolKind', 'poolName'],
                'jvm/pool/used': ['poolKind', 'poolName'],
                'jvm/mem/init': ['memKind'],
                'jvm/mem/max': ['memKind'],
                'jvm/mem/used': ['memKind'],
                'jvm/mem/committed': ['memKind'],
                'jvm/gc/count': ['gcName'],
                'jvm/gc/time': ['gcName'],
            },
            'peon': {
                'jetty/numOpenConnections': None,
                'query/time': ['dataSource'],
                'query/bytes': ['dataSource'],
                'segment/scan/pending': None,
                'query/wait/time': None,
                'query/success/count': None,
                'query/failed/count': None,
                'query/interrupted/count': None,
                'ingest/events/thrownAway': ['dataSource'],
                'ingest/events/unparseable': ['dataSource'],
                'ingest/events/processed': ['dataSource'],
                'ingest/rows/output': ['dataSource'],
                'ingest/persists/count': ['dataSource'],
                'ingest/persists/failed': ['dataSource'],
                'ingest/handoff/failed': ['dataSource'],
                'ingest/handoff/count': ['dataSource'],
            },
        }

        # Buckets used when storing histogram metrics.
        # 'sum' is a special bucket that will be used to collect the sum
        # of all values ending up in the various buckets.
        self.metric_buckets = {
            'query/time': ['10', '100', '500', '1000', '10000', 'inf', 'sum'],
            'query/bytes': ['10', '100', '500', '1000', '10000', 'inf', 'sum'],
        }

        # Data structure holding histogram data
        # Format: {daemon: {metric_name: {bucket2: value, bucket2: value, ...}}
        self.histograms = defaultdict(lambda: {})
        self.histograms_metrics = set([
            'query/time',
            'query/bytes',
        ])

        # Data structure holding counters data
        # Format: {daemon: {label_name: {label2_name: value}}
        # The order of the labels listed in supported_metric_names is important
        # since it is reflected in this data structure. The layering is not
        # strictly important for the final prometheus metrics but it is simplifies
        # the code that creates them (collect method).
        self.counters = defaultdict(lambda: {})
        self.counters_metrics = set([
            'query/cache/total/numEntries',
            'query/cache/total/sizeBytes',
            'query/cache/total/hits',
            'query/cache/total/misses',
            'query/cache/total/evictions',
            'query/cache/total/timeouts',
            'query/cache/total/errors',
            'segment/max',
            'segment/count',
            'segment/used',
            'segment/scan/pending',
            'segment/assigned/count',
            'segment/moved/count',
            'segment/dropped/count',
            'segment/deleted/count',
            'segment/unneeded/count',
            'segment/overShadowed/count',
            'segment/loadQueue/failed',
            'segment/loadQueue/count',
            'segment/dropQueue/count',
            'segment/size',
            'segment/unavailable/count',
            'segment/underReplicated/count',
            'ingest/events/thrownAway',
            'ingest/events/unparseable',
            'ingest/events/processed',
            'ingest/rows/output',
            'ingest/persists/count',
            'ingest/persists/failed',
            'ingest/handoff/failed',
            'ingest/handoff/count',
            'jvm/pool/committed',
            'jvm/pool/init',
            'jvm/pool/max',
            'jvm/pool/used',
            'jvm/mem/init',
            'jvm/mem/max',
            'jvm/mem/used',
            'jvm/mem/committed',
            'jvm/gc/count',
            'jvm/gc/time',
            'jetty/numOpenConnections',
        ])

    @staticmethod
    def sanitize_field(datapoint_field):
        return datapoint_field.replace('druid/', '').lower()

    def _get_general_counters(self, daemon):
        return {
            'jetty/numOpenConnections':
            GaugeMetricFamily('druid_' + daemon + '_jetty_num_connections',
                              'Number of open connections.'),
            'jvm/pool/committed':
            GaugeMetricFamily('druid_' + daemon + '_jvm_pool_committed',
                              'Number of Committed pool.',
                              labels=['poolKind', 'poolName']),
            'jvm/pool/init':
            GaugeMetricFamily('druid_' + daemon + '_jvm_pool_init',
                              'Number of Initial pool.',
                              labels=['poolKind', 'poolName']),
            'jvm/pool/max':
            GaugeMetricFamily('druid_' + daemon + '_jvm_pool_max',
                              'Number of Max pool.',
                              labels=['poolKind', 'poolName']),
            'jvm/pool/used':
            GaugeMetricFamily('druid_' + daemon + '_jvm_pool_used',
                              'Number of Pool used.',
                              labels=['poolKind', 'poolName']),
            'jvm/mem/init':
            GaugeMetricFamily('druid_' + daemon + '_jvm_mem_init',
                              'Number of Initial memory.',
                              labels=['memKind']),
            'jvm/mem/max':
            GaugeMetricFamily('druid_' + daemon + '_jvm_mem_max',
                              'Number of Max memory.',
                              labels=['memKind']),
            'jvm/mem/used':
            GaugeMetricFamily('druid_' + daemon + '_jvm_mem_used',
                              'Number of Used memory.',
                              labels=['memKind']),
            'jvm/mem/committed':
            GaugeMetricFamily('druid_' + daemon + '_jvm_mem_committed',
                              'Number of Committed memory.',
                              labels=['memKind']),
            'jvm/gc/count':
            GaugeMetricFamily('druid_' + daemon + '_jvm_gc_count',
                              'Number of Garbage collection count.',
                              labels=['gcName']),
            'jvm/gc/time':
            GaugeMetricFamily('druid_' + daemon + '_jvm_gc_time',
                              'Number of Garbage collection time.',
                              labels=['gcName']),
        }

    def _get_realtime_counters(self):
        return {
            'ingest/events/thrownAway':
            GaugeMetricFamily('druid_realtime_ingest_events_thrown_away_count',
                              'Number of events rejected because '
                              'they are outside the windowPeriod.',
                              labels=['datasource']),
            'ingest/events/unparseable':
            GaugeMetricFamily(
                'druid_realtime_ingest_events_unparseable_count',
                'Number of events rejected because the events are unparseable.',
                labels=['datasource']),
            'ingest/events/processed':
            GaugeMetricFamily(
                'druid_realtime_ingest_events_processed_count',
                'Number of events successfully processed per emission period.',
                labels=['datasource']),
            'ingest/rows/output':
            GaugeMetricFamily('druid_realtime_ingest_rows_output_count',
                              'Number of Druid rows persisted.',
                              labels=['datasource']),
            'ingest/persists/count':
            GaugeMetricFamily('druid_realtime_ingest_persists_count',
                              'Number of times persist occurred.',
                              labels=['datasource']),
            'ingest/persists/failed':
            GaugeMetricFamily('druid_realtime_ingest_persists_failed_count',
                              'Number of times persist failed.',
                              labels=['datasource']),
            'ingest/handoff/failed':
            GaugeMetricFamily('druid_realtime_ingest_handoff_failed_count',
                              'Number of times handoff failed.',
                              labels=['datasource']),
            'ingest/handoff/count':
            GaugeMetricFamily('druid_realtime_ingest_handoff_count',
                              'Number of times handoff has happened.',
                              labels=['datasource']),
        }

    def _get_query_histograms(self, daemon):
        return {
            'query/time':
            HistogramMetricFamily('druid_' + daemon + '_query_time_ms',
                                  'Milliseconds taken to complete a query.',
                                  labels=['datasource']),
            'query/bytes':
            HistogramMetricFamily(
                'druid_' + daemon + '_query_bytes',
                'Number of bytes returned in query response.',
                labels=['datasource']),
        }

    def _get_query_counters(self):
        return {
            'query/success/count':
            GaugeMetricFamily('druid_broker_query_success_count',
                              'Number of success queries.'),
            'query/failed/count':
            GaugeMetricFamily('druid_broker_query_failed_count',
                              'Number of failed queries'),
            'query/interrupted/count':
            GaugeMetricFamily('druid_broker_query_interrupted_count',
                              'Number of interrupted queries.'),
            'query/node/time':
            GaugeMetricFamily('druid_broker_query_node_time_ms',
                              'Number of query time.'),
            'query/node/bytes':
            GaugeMetricFamily('druid_broker_query_node_bytes',
                              'Number of query bytes'),
            'query/node/ttfb':
            GaugeMetricFamily('druid_broker_query_node_ttfb_ms',
                              'Time to first byte..'),
        }

    def _get_cache_counters(self, daemon):
        return {
            'query/cache/total/numEntries':
            GaugeMetricFamily(
                'druid_' + daemon + '_query_cache_numentries_count',
                'Number of cache entries.'),
            'query/cache/total/sizeBytes':
            GaugeMetricFamily('druid_' + daemon + '_query_cache_size_bytes',
                              'Size in bytes of cache entries.'),
            'query/cache/total/hits':
            GaugeMetricFamily('druid_' + daemon + '_query_cache_hits_count',
                              'Number of cache hits.'),
            'query/cache/total/misses':
            GaugeMetricFamily('druid_' + daemon + '_query_cache_misses_count',
                              'Number of cache misses.'),
            'query/cache/total/evictions':
            GaugeMetricFamily(
                'druid_' + daemon + '_query_cache_evictions_count',
                'Number of cache evictions.'),
            'query/cache/total/timeouts':
            GaugeMetricFamily(
                'druid_' + daemon + '_query_cache_timeouts_count',
                'Number of cache timeouts.'),
            'query/cache/total/errors':
            GaugeMetricFamily('druid_' + daemon + '_query_cache_errors_count',
                              'Number of cache errors.'),
        }

    def _get_historical_counters(self):
        return {
            'segment/max':
            GaugeMetricFamily('druid_historical_max_segment_bytes',
                              'Maximum byte limit available for segments.'),
            'segment/count':
            GaugeMetricFamily('druid_historical_segment_count',
                              'Number of served segments.',
                              labels=['tier', 'datasource']),
            'segment/used':
            GaugeMetricFamily('druid_historical_segment_used_bytes',
                              'Bytes used for served segments.',
                              labels=['tier', 'datasource']),
            'segment/scan/pending':
            GaugeMetricFamily(
                'druid_historical_segment_scan_pending',
                'Number of segments in queue waiting to be scanned.'),
            'query/success/count':
            GaugeMetricFamily('druid_historical_query_success_count',
                              'Number of success queries.'),
            'query/failed/count':
            GaugeMetricFamily('druid_historical_query_failed_count',
                              'Number of failed queries'),
            'query/interrupted/count':
            GaugeMetricFamily('druid_historical_query_interrupted_count',
                              'Number of interrupted queries.'),
        }

    def _get_coordinator_counters(self):
        return {
            'segment/assigned/count':
            GaugeMetricFamily(
                'druid_coordinator_segment_assigned_count',
                'Number of segments assigned to be loaded in the cluster.',
                labels=['tier']),
            'segment/moved/count':
            GaugeMetricFamily(
                'druid_coordinator_segment_moved_count',
                'Number of segments assigned to be loaded in the cluster.',
                labels=['tier']),
            'segment/dropped/count':
            GaugeMetricFamily(
                'druid_coordinator_segment_dropped_count',
                'Number of segments dropped due to being overshadowed.',
                labels=['tier']),
            'segment/deleted/count':
            GaugeMetricFamily('druid_coordinator_segment_deleted_count',
                              'Number of segments dropped due to rules.',
                              labels=['tier']),
            'segment/unneeded/count':
            GaugeMetricFamily(
                'druid_coordinator_segment_unneeded_count',
                'Number of segments dropped due to being marked as unused.',
                labels=['tier']),
            'segment/overShadowed/count':
            GaugeMetricFamily('druid_coordinator_segment_overshadowed_count',
                              'Number of overShadowed segments.'),
            'segment/loadQueue/failed':
            GaugeMetricFamily(
                'druid_coordinator_segment_loadqueue_failed_count',
                'Number of segments that failed to load.',
                labels=['server']),
            'segment/loadQueue/count':
            GaugeMetricFamily('druid_coordinator_segment_loadqueue_count',
                              'Number of segments to load.',
                              labels=['server']),
            'segment/dropQueue/count':
            GaugeMetricFamily('druid_coordinator_segment_dropqueue_count',
                              'Number of segments to drop.',
                              labels=['server']),
            'segment/size':
            GaugeMetricFamily('druid_coordinator_segment_size_bytes',
                              'Size in bytes of available segments.',
                              labels=['datasource']),
            'segment/count':
            GaugeMetricFamily('druid_coordinator_segment_count',
                              'Number of served segments.',
                              labels=['datasource']),
            'segment/unavailable/count':
            GaugeMetricFamily(
                'druid_coordinator_segment_unavailable_count',
                'Number of segments (not including replicas) left to load '
                'until segments that should be loaded in the cluster '
                'are available for queries.',
                labels=['datasource']),
            'segment/underReplicated/count':
            GaugeMetricFamily(
                'druid_coordinator_segment_under_replicated_count',
                'Number of segments (including replicas) left to load until '
                'segments that should be loaded in the cluster are '
                'available for queries.',
                labels=['tier', 'datasource']),
        }

    def store_counter(self, datapoint):
        """ This function adds data to the self.counters dictiorary following its
            convention, creating on the fly the missing bits. For example, given:
            self.counters = {}
            datapoint = {'service': 'druid/broker', 'metric'='segment/size',
                         'datasource': 'test', 'value': 10}

            This function will creates the following:
            self.counters = {'segment/size': {'broker': {'test': 10}}}

            The algorithm is generic enough to support all metrics handled by
            self.counters without caring about the number of labels needed.
        """
        daemon = DruidCollector.sanitize_field(str(datapoint['service']))
        metric_name = str(datapoint['metric'])
        metric_value = float(datapoint['value'])

        metrics_storage = self.counters[metric_name]
        metric_labels = self.supported_metric_names[daemon][metric_name]

        metrics_storage.setdefault(daemon, {})

        if metric_labels:
            metrics_storage_cursor = metrics_storage[daemon]
            for label in metric_labels:
                label_value = str(datapoint[label])
                if metric_labels[-1] != label:
                    metrics_storage_cursor.setdefault(label_value, {})
                    metrics_storage_cursor = metrics_storage_cursor[
                        label_value]
                else:
                    metrics_storage_cursor[label_value] = metric_value
        else:
            metrics_storage[daemon] = metric_value

        log.debug("The datapoint {} modified the counters dictionary to: \n{}".
                  format(datapoint, self.counters))

    def store_histogram(self, datapoint):
        """ Store datapoints that will end up in histogram buckets using a dictiorary.
            This function is highly customized for the only histograms configured
            so far, rather than being generic like store_counter. Example of how
            it works:
            self.histograms = {}
            datapoint = {'service': 'druid/broker', 'metric'='query/time',
                         'datasource': 'test', 'value': 10}

            This function will creates the following:
            self.counters = {'query/time': {'broker':
                {'test': {'10': 1, '100': 1, etc.., 'sum': 10}}}}}
        """
        daemon = DruidCollector.sanitize_field(str(datapoint['service']))
        metric_name = str(datapoint['metric'])
        metric_value = float(datapoint['value'])
        datasource = str(datapoint['dataSource'])

        self.histograms.setdefault(metric_name, {daemon: {datasource: {}}})
        self.histograms[metric_name].setdefault(daemon, {datasource: {}})
        self.histograms[metric_name][daemon].setdefault(datasource, {})

        for bucket in self.metric_buckets[metric_name]:
            stored_buckets = self.histograms[metric_name][daemon][datasource]
            if bucket not in stored_buckets:
                stored_buckets[bucket] = 0
            if bucket != 'sum' and metric_value <= float(bucket):
                stored_buckets[bucket] += 1
        stored_buckets['sum'] += metric_value

        log.debug(
            "The datapoint {} modified the histograms dictionary to: \n{}".
            format(datapoint, self.histograms))

    @scrape_duration.time()
    def collect(self):
        # Metrics common to Broker, Historical and Peon
        for daemon in ['broker', 'historical', 'peon']:
            query_metrics = self._get_query_histograms(daemon)
            cache_metrics = self._get_cache_counters(daemon)

            for metric in query_metrics:
                if not self.histograms[metric]:
                    continue
                if daemon in self.histograms[metric]:
                    for datasource in self.histograms[metric][daemon]:
                        buckets = self.histograms[metric][daemon][datasource]
                        buckets_without_sum = [(k, v)
                                               for k, v in buckets.items()
                                               if k != 'sum']
                        query_metrics[metric].add_metric(
                            [datasource],
                            buckets=buckets_without_sum,
                            sum_value=self.histograms[metric][daemon]
                            [datasource]['sum'])
                    yield query_metrics[metric]

        # Metrics common to Broker and Historical
        for daemon in ['broker', 'historical']:
            cache_metrics = self._get_cache_counters(daemon)

            for metric in cache_metrics:
                if not self.counters[metric] or daemon not in self.counters[
                        metric]:
                    if not self.supported_metric_names[daemon][metric]:
                        cache_metrics[metric].add_metric([], float('nan'))
                    else:
                        continue
                else:
                    cache_metrics[metric].add_metric(
                        [], self.counters[metric][daemon])
                yield cache_metrics[metric]

        # Metrics common to all
        for daemon in ['middlemanager', 'broker', 'historical', 'coordinator']:
            generic_metrics = self._get_general_counters(daemon)

            for metric in generic_metrics:
                if not self.counters[metric] or daemon not in self.counters[
                        metric]:
                    if not self.supported_metric_names[daemon][metric]:
                        generic_metrics[metric].add_metric([], float('nan'))
                    else:
                        continue
                else:
                    labels = self.supported_metric_names[daemon][metric]
                    if not labels:
                        generic_metrics[metric].add_metric(
                            [], self.counters[metric][daemon])
                    elif len(labels) == 1:
                        for label in self.counters[metric][daemon]:
                            generic_metrics[metric].add_metric(
                                [label], self.counters[metric][daemon][label])
                    else:
                        for outer_label in self.counters[metric][daemon]:
                            for inner_label in self.counters[metric][daemon][
                                    outer_label]:
                                generic_metrics[metric].add_metric(
                                    [outer_label, inner_label],
                                    self.counters[metric][daemon][outer_label]
                                    [inner_label])
                yield generic_metrics[metric]

        historical_health_metrics = self._get_historical_counters()
        coordinator_metrics = self._get_coordinator_counters()
        realtime_metrics = self._get_realtime_counters()
        broker_metrics = self._get_query_counters()
        for daemon, metrics in [('coordinator', coordinator_metrics),
                                ('historical', historical_health_metrics),
                                ('peon', realtime_metrics),
                                ('broker', broker_metrics)]:
            for metric in metrics:
                if not self.counters[metric] or daemon not in self.counters[
                        metric]:
                    if not self.supported_metric_names[daemon][metric]:
                        metrics[metric].add_metric([], float('nan'))
                    else:
                        continue
                else:
                    labels = self.supported_metric_names[daemon][metric]
                    if not labels:
                        metrics[metric].add_metric(
                            [], self.counters[metric][daemon])
                    elif len(labels) == 1:
                        for label in self.counters[metric][daemon]:
                            metrics[metric].add_metric(
                                [label], self.counters[metric][daemon][label])
                    else:
                        for outer_label in self.counters[metric][daemon]:
                            for inner_label in self.counters[metric][daemon][
                                    outer_label]:
                                metrics[metric].add_metric(
                                    [outer_label, inner_label],
                                    self.counters[metric][daemon][outer_label]
                                    [inner_label])
                yield metrics[metric]

        registered = CounterMetricFamily(
            'druid_exporter_datapoints_registered_count',
            'Number of datapoints successfully registered '
            'by the exporter.')
        registered.add_metric([], self.datapoints_registered)
        yield registered

    def register_datapoint(self, datapoint):
        if datapoint['feed'] != 'metrics':
            log.debug("The following feed does not contain a datapoint, "
                      "dropping it: {}".format(datapoint))
            return

        daemon = DruidCollector.sanitize_field(str(datapoint['service']))
        if (datapoint['feed'] != 'metrics'
                or daemon not in self.supported_metric_names
                or datapoint['metric']
                not in self.supported_metric_names[daemon]):
            log.debug(
                "The following datapoint is not supported, either "
                "because the 'feed' field is not 'metrics' or "
                "the metric itself is not supported: {}".format(datapoint))
            return

        metric_name = str(datapoint['metric'])
        if metric_name in self.histograms_metrics:
            self.store_histogram(datapoint)
        elif metric_name in self.counters_metrics:
            self.store_counter(datapoint)

        self.datapoints_registered += 1
Пример #26
0
 def setUp(self):
     self.registry = CollectorRegistry()
     self.summary = Summary('s', 'help', registry=self.registry)
     self.summary_with_labels = Summary('s_with_labels', 'help', labelnames=("label1",), registry=self.registry)
Пример #27
0
    def test_timer_not_observable(self):
        s = Summary('test', 'help', labelnames=('label',), registry=self.registry)

        assert_not_observable(s.time)