def setUp(self): self.check = PrometheusCheck('prometheus_check', {}, {}, {}) self.check.gauge = MagicMock() self.check.log = logging.getLogger('datadog-prometheus.test') self.check.log.debug = MagicMock() self.check.metrics_mapper = { 'process_virtual_memory_bytes': 'process.vm.bytes' } self.check.NAMESPACE = 'prometheus' self.protobuf_content_type = 'application/vnd.google.protobuf; proto=io.prometheus.client.MetricFamily; encoding=delimited' # reference gauge metric in the protobuf target class type self.ref_gauge = metrics_pb2.MetricFamily() self.ref_gauge.name = 'process_virtual_memory_bytes' self.ref_gauge.help = 'Virtual memory size in bytes.' self.ref_gauge.type = 1 # GAUGE _m = self.ref_gauge.metric.add() _m.gauge.value = 39211008.0 # Loading test binary data self.bin_data = None f_name = os.path.join(os.path.dirname(__file__), 'fixtures', 'prometheus', 'protobuf.bin') with open(f_name, 'rb') as f: self.bin_data = f.read() self.assertEqual(len(self.bin_data), 51855) self.text_data = None # Loading test text data f_name = os.path.join(os.path.dirname(__file__), 'fixtures', 'prometheus', 'metrics.txt') with open(f_name, 'rb') as f: self.text_data = f.read() self.assertEqual(len(self.text_data), 14494)
def test_parse_one_counter(self): """ name: "go_memstats_mallocs_total" help: "Total number of mallocs." type: COUNTER metric { counter { value: 18713.0 } } """ text_data = ( "# HELP go_memstats_mallocs_total Total number of mallocs.\n" "# TYPE go_memstats_mallocs_total counter\n" "go_memstats_mallocs_total 18713\n") expected_etcd_metric = metrics_pb2.MetricFamily() expected_etcd_metric.help = "Total number of mallocs." expected_etcd_metric.name = "go_memstats_mallocs_total" expected_etcd_metric.type = 0 expected_etcd_metric.metric.add().counter.value = 18713 # Iter on the generator to get all metrics response = MockResponse(text_data, 'text/plain; version=0.0.4') metrics = [k for k in self.check.parse_metric_family(response)] self.assertEqual(1, len(metrics)) current_metric = metrics[0] self.assertEqual(expected_etcd_metric, current_metric) # Remove the old metric and add a new one with a different value expected_etcd_metric.metric.pop() expected_etcd_metric.metric.add().counter.value = 18714 self.assertNotEqual(expected_etcd_metric, current_metric)
def parse_metric_family(self, buf, content_type): """ Gets the output data from a prometheus endpoint response along with its Content-type header and parses it into Prometheus classes (see [0]) Parse the binary buffer in input, searching for Prometheus messages of type MetricFamily [0] delimited by a varint32 [1] when the content-type is a `application/vnd.google.protobuf`. [0] https://github.com/prometheus/client_model/blob/086fe7ca28bde6cec2acd5223423c1475a362858/metrics.proto#L76-%20%20L81 [1] https://developers.google.com/protocol-buffers/docs/reference/java/com/google/protobuf/AbstractMessageLite#writeDelimitedTo(java.io.OutputStream) """ if 'application/vnd.google.protobuf' in content_type: n = 0 while n < len(buf): msg_len, new_pos = _DecodeVarint32(buf, n) n = new_pos msg_buf = buf[n:n+msg_len] n += msg_len message = metrics_pb2.MetricFamily() message.ParseFromString(msg_buf) yield message elif 'text/plain' in content_type: messages = {} # map with the name of the element (before the labels) and the list of occurrences with labels and values obj_map = {} # map of the types of each metrics obj_help = {} # help for the metrics for line in buf.splitlines(): self._extract_metrics_from_string(line, messages, obj_map, obj_help) for _m in obj_map: if _m in messages or (obj_map[_m] == 'histogram' and '{}_bucket'.format(_m) in messages): yield self._extract_metric_from_map(_m, messages, obj_map, obj_help) else: raise self.UnknownFormatError('Unsupported content-type provided: {}'.format(content_type))
def test_submit_metric_counter(self): _counter = metrics_pb2.MetricFamily() _counter.name = 'my_counter' _counter.help = 'Random counter' _counter.type = 0 # COUNTER _met = _counter.metric.add() _met.counter.value = 42 self.check._submit_metric('custom.counter', _counter) self.check.gauge.assert_called_with('prometheus.custom.counter', 42, [])
def test_process_metric_filtered(self): ''' Metric absent from the metrics_mapper ''' filtered_gauge = metrics_pb2.MetricFamily() filtered_gauge.name = "process_start_time_seconds" filtered_gauge.help = "Start time of the process since unix epoch in seconds." filtered_gauge.type = 1 # GAUGE _m = filtered_gauge.metric.add() _m.gauge.value = 39211008.0 self.check.process_metric(filtered_gauge) self.check.log.debug.assert_called_with("Unable to handle metric: process_start_time_seconds - error: 'PrometheusCheck' object has no attribute 'process_start_time_seconds'") self.check.gauge.assert_not_called()
def test_parse_one_summary_with_none_values(self): text_data = ( '# HELP http_response_size_bytes The HTTP response sizes in bytes.\n' '# TYPE http_response_size_bytes summary\n' 'http_response_size_bytes{handler="prometheus",quantile="0.5"} NaN\n' 'http_response_size_bytes{handler="prometheus",quantile="0.9"} NaN\n' 'http_response_size_bytes{handler="prometheus",quantile="0.99"} NaN\n' 'http_response_size_bytes_sum{handler="prometheus"} 0\n' 'http_response_size_bytes_count{handler="prometheus"} 0\n') expected_etcd_metric = metrics_pb2.MetricFamily() expected_etcd_metric.help = "The HTTP response sizes in bytes." expected_etcd_metric.name = "http_response_size_bytes" expected_etcd_metric.type = 2 summary_metric = expected_etcd_metric.metric.add() # Label for prometheus handler summary_label = summary_metric.label.add() summary_label.name, summary_label.value = "handler", "prometheus" # Root summary sample summary_metric.summary.sample_count = 0 summary_metric.summary.sample_sum = 0. # Create quantiles 0.5, 0.9, 0.99 quantile_05 = summary_metric.summary.quantile.add() quantile_05.quantile = 0.5 quantile_05.value = float('nan') quantile_09 = summary_metric.summary.quantile.add() quantile_09.quantile = 0.9 quantile_09.value = float('nan') quantile_099 = summary_metric.summary.quantile.add() quantile_099.quantile = 0.99 quantile_099.value = float('nan') # Iter on the generator to get all metrics response = MockResponse(text_data, 'text/plain; version=0.0.4') metrics = [k for k in self.check.parse_metric_family(response)] self.assertEqual(1, len(metrics)) current_metric = metrics[0] # As the NaN value isn't supported when we are calling assertEqual # we need to compare the object representation instead of the object itself self.assertEqual(expected_etcd_metric.__repr__(), current_metric.__repr__())
def test_submits_summary(self): _sum = metrics_pb2.MetricFamily() _sum.name = 'my_summary' _sum.help = 'Random summary' _sum.type = 2 # SUMMARY _met = _sum.metric.add() _met.summary.sample_count = 42 _met.summary.sample_sum = 3.14 _q1 = _met.summary.quantile.add() _q1.quantile = 10.0 _q1.value = 3 _q2 = _met.summary.quantile.add() _q2.quantile = 4.0 _q2.value = 5 self.check._submit('custom.summary', _sum) self.check.gauge.assert_has_calls([ call('prometheus.custom.summary.count', 42, []), call('prometheus.custom.summary.sum', 3.14, []), call('prometheus.custom.summary.quantile', 3, ['quantile:10.0']), call('prometheus.custom.summary.quantile', 5, ['quantile:4.0']) ])
def test_submit_metric_histogram(self): _histo = metrics_pb2.MetricFamily() _histo.name = 'my_histogram' _histo.help = 'Random histogram' _histo.type = 4 # HISTOGRAM _met = _histo.metric.add() _met.histogram.sample_count = 42 _met.histogram.sample_sum = 3.14 _b1 = _met.histogram.bucket.add() _b1.upper_bound = 12.7 _b1.cumulative_count = 33 _b2 = _met.histogram.bucket.add() _b2.upper_bound = 18.2 _b2.cumulative_count = 666 self.check._submit_metric('custom.histogram', _histo) self.check.gauge.assert_has_calls([ call('prometheus.custom.histogram.count', 42, []), call('prometheus.custom.histogram.sum', 3.14, []), call('prometheus.custom.histogram.count', 33, ['upper_bound:12.7']), call('prometheus.custom.histogram.count', 666, ['upper_bound:18.2']) ])
def test_parse_one_gauge(self): """ name: "etcd_server_has_leader" help: "Whether or not a leader exists. 1 is existence, 0 is not." type: GAUGE metric { gauge { value: 1.0 } } """ text_data = ( "# HELP etcd_server_has_leader Whether or not a leader exists. 1 is existence, 0 is not.\n" "# TYPE etcd_server_has_leader gauge\n" "etcd_server_has_leader 1\n") expected_etcd_metric = metrics_pb2.MetricFamily() expected_etcd_metric.help = "Whether or not a leader exists. 1 is existence, 0 is not." expected_etcd_metric.name = "etcd_server_has_leader" expected_etcd_metric.type = 1 expected_etcd_metric.metric.add().gauge.value = 1 # Iter on the generator to get all metrics response = MockResponse(text_data, 'text/plain; version=0.0.4') metrics = [k for k in self.check.parse_metric_family(response)] self.assertEqual(1, len(metrics)) current_metric = metrics[0] self.assertEqual(expected_etcd_metric, current_metric) # Remove the old metric and add a new one with a different value expected_etcd_metric.metric.pop() expected_etcd_metric.metric.add().gauge.value = 0 self.assertNotEqual(expected_etcd_metric, current_metric) # Re-add the expected value but as different type: it should works expected_etcd_metric.metric.pop() expected_etcd_metric.metric.add().gauge.value = 1.0 self.assertEqual(expected_etcd_metric, current_metric)
def test_parse_two_summaries_with_labels(self): text_data = ( '# HELP http_response_size_bytes The HTTP response sizes in bytes.\n' '# TYPE http_response_size_bytes summary\n' 'http_response_size_bytes{from="internet",handler="prometheus",quantile="0.5"} 24547\n' 'http_response_size_bytes{from="internet",handler="prometheus",quantile="0.9"} 25763\n' 'http_response_size_bytes{from="internet",handler="prometheus",quantile="0.99"} 25763\n' 'http_response_size_bytes_sum{from="internet",handler="prometheus"} 120512\n' 'http_response_size_bytes_count{from="internet",handler="prometheus"} 5\n' 'http_response_size_bytes{from="cluster",handler="prometheus",quantile="0.5"} 24615\n' 'http_response_size_bytes{from="cluster",handler="prometheus",quantile="0.9"} 24627\n' 'http_response_size_bytes{from="cluster",handler="prometheus",quantile="0.99"} 24627\n' 'http_response_size_bytes_sum{from="cluster",handler="prometheus"} 94913\n' 'http_response_size_bytes_count{from="cluster",handler="prometheus"} 4\n' ) expected_etcd_metric = metrics_pb2.MetricFamily() expected_etcd_metric.help = "The HTTP response sizes in bytes." expected_etcd_metric.name = "http_response_size_bytes" expected_etcd_metric.type = 2 # Metric from internet # summary_metric_from_internet = expected_etcd_metric.metric.add() # Label for prometheus handler summary_label = summary_metric_from_internet.label.add() summary_label.name, summary_label.value = "handler", "prometheus" summary_label = summary_metric_from_internet.label.add() summary_label.name, summary_label.value = "from", "internet" # Root summary sample summary_metric_from_internet.summary.sample_count = 5 summary_metric_from_internet.summary.sample_sum = 120512 # Create quantiles 0.5, 0.9, 0.99 quantile_05 = summary_metric_from_internet.summary.quantile.add() quantile_05.quantile = 0.5 quantile_05.value = 24547 quantile_09 = summary_metric_from_internet.summary.quantile.add() quantile_09.quantile = 0.9 quantile_09.value = 25763 quantile_099 = summary_metric_from_internet.summary.quantile.add() quantile_099.quantile = 0.99 quantile_099.value = 25763 # Metric from cluster # summary_metric_from_cluster = expected_etcd_metric.metric.add() # Label for prometheus handler summary_label = summary_metric_from_cluster.label.add() summary_label.name, summary_label.value = "handler", "prometheus" summary_label = summary_metric_from_cluster.label.add() summary_label.name, summary_label.value = "from", "cluster" # Root summary sample summary_metric_from_cluster.summary.sample_count = 4 summary_metric_from_cluster.summary.sample_sum = 94913 # Create quantiles 0.5, 0.9, 0.99 quantile_05 = summary_metric_from_cluster.summary.quantile.add() quantile_05.quantile = 0.5 quantile_05.value = 24615 quantile_09 = summary_metric_from_cluster.summary.quantile.add() quantile_09.quantile = 0.9 quantile_09.value = 24627 quantile_099 = summary_metric_from_cluster.summary.quantile.add() quantile_099.quantile = 0.99 quantile_099.value = 24627 # Iter on the generator to get all metrics response = MockResponse(text_data, 'text/plain; version=0.0.4') metrics = [k for k in self.check.parse_metric_family(response)] self.assertEqual(1, len(metrics)) current_metric = metrics[0] self.assertEqual(expected_etcd_metric, current_metric)
def test_parse_one_summary(self): """ name: "http_response_size_bytes" help: "The HTTP response sizes in bytes." type: SUMMARY metric { label { name: "handler" value: "prometheus" } summary { sample_count: 5 sample_sum: 120512.0 quantile { quantile: 0.5 value: 24547.0 } quantile { quantile: 0.9 value: 25763.0 } quantile { quantile: 0.99 value: 25763.0 } } } """ text_data = ( '# HELP http_response_size_bytes The HTTP response sizes in bytes.\n' '# TYPE http_response_size_bytes summary\n' 'http_response_size_bytes{handler="prometheus",quantile="0.5"} 24547\n' 'http_response_size_bytes{handler="prometheus",quantile="0.9"} 25763\n' 'http_response_size_bytes{handler="prometheus",quantile="0.99"} 25763\n' 'http_response_size_bytes_sum{handler="prometheus"} 120512\n' 'http_response_size_bytes_count{handler="prometheus"} 5\n') expected_etcd_metric = metrics_pb2.MetricFamily() expected_etcd_metric.help = "The HTTP response sizes in bytes." expected_etcd_metric.name = "http_response_size_bytes" expected_etcd_metric.type = 2 summary_metric = expected_etcd_metric.metric.add() # Label for prometheus handler summary_label = summary_metric.label.add() summary_label.name, summary_label.value = "handler", "prometheus" # Root summary sample summary_metric.summary.sample_count = 5 summary_metric.summary.sample_sum = 120512 # Create quantiles 0.5, 0.9, 0.99 quantile_05 = summary_metric.summary.quantile.add() quantile_05.quantile = 0.5 quantile_05.value = 24547 quantile_09 = summary_metric.summary.quantile.add() quantile_09.quantile = 0.9 quantile_09.value = 25763 quantile_099 = summary_metric.summary.quantile.add() quantile_099.quantile = 0.99 quantile_099.value = 25763 # Iter on the generator to get all metrics response = MockResponse(text_data, 'text/plain; version=0.0.4') metrics = [k for k in self.check.parse_metric_family(response)] self.assertEqual(1, len(metrics)) current_metric = metrics[0] self.assertEqual(expected_etcd_metric, current_metric)
def test_parse_two_histograms_with_label(self): text_data = ( '# HELP etcd_disk_wal_fsync_duration_seconds The latency distributions of fsync called by wal.\n' '# TYPE etcd_disk_wal_fsync_duration_seconds histogram\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{kind="fs",app="vault",le="0.001"} 2\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{kind="fs",app="vault",le="0.002"} 2\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{kind="fs",app="vault",le="0.004"} 2\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{kind="fs",app="vault",le="0.008"} 2\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{kind="fs",app="vault",le="0.016"} 4\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{kind="fs",app="vault",le="0.032"} 4\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{kind="fs",app="vault",le="0.064"} 4\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{kind="fs",app="vault",le="0.128"} 4\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{kind="fs",app="vault",le="0.256"} 4\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{kind="fs",app="vault",le="0.512"} 4\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{kind="fs",app="vault",le="1.024"} 4\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{kind="fs",app="vault",le="2.048"} 4\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{kind="fs",app="vault",le="4.096"} 4\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{kind="fs",app="vault",le="8.192"} 4\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{kind="fs",app="vault",le="+Inf"} 4\n' 'etcd_disk_wal_fsync_duration_seconds_sum{kind="fs",app="vault"} 0.026131671\n' 'etcd_disk_wal_fsync_duration_seconds_count{kind="fs",app="vault"} 4\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{kind="fs",app="kubernetes",le="0.001"} 718\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{kind="fs",app="kubernetes",le="0.002"} 740\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{kind="fs",app="kubernetes",le="0.004"} 743\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{kind="fs",app="kubernetes",le="0.008"} 748\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{kind="fs",app="kubernetes",le="0.016"} 751\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{kind="fs",app="kubernetes",le="0.032"} 751\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{kind="fs",app="kubernetes",le="0.064"} 751\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{kind="fs",app="kubernetes",le="0.128"} 751\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{kind="fs",app="kubernetes",le="0.256"} 751\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{kind="fs",app="kubernetes",le="0.512"} 751\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{kind="fs",app="kubernetes",le="1.024"} 751\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{kind="fs",app="kubernetes",le="2.048"} 751\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{kind="fs",app="kubernetes",le="4.096"} 751\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{kind="fs",app="kubernetes",le="8.192"} 751\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{kind="fs",app="kubernetes",le="+Inf"} 751\n' 'etcd_disk_wal_fsync_duration_seconds_sum{kind="fs",app="kubernetes"} 0.3097010759999998\n' 'etcd_disk_wal_fsync_duration_seconds_count{kind="fs",app="kubernetes"} 751\n' ) expected_etcd_metric = metrics_pb2.MetricFamily() expected_etcd_metric.help = "The latency distributions of fsync called by wal." expected_etcd_metric.name = "etcd_disk_wal_fsync_duration_seconds" expected_etcd_metric.type = 4 # Vault histogram_metric = expected_etcd_metric.metric.add() # Label for app vault summary_label = histogram_metric.label.add() summary_label.name, summary_label.value = "kind", "fs" summary_label = histogram_metric.label.add() summary_label.name, summary_label.value = "app", "vault" for upper_bound, cumulative_count in [ (0.001, 2), (0.002, 2), (0.004, 2), (0.008, 2), (0.016, 4), (0.032, 4), (0.064, 4), (0.128, 4), (0.256, 4), (0.512, 4), (1.024, 4), (2.048, 4), (4.096, 4), (8.192, 4), (float('inf'), 4), ]: bucket = histogram_metric.histogram.bucket.add() bucket.upper_bound = upper_bound bucket.cumulative_count = cumulative_count # Root histogram sample histogram_metric.histogram.sample_count = 4 histogram_metric.histogram.sample_sum = 0.026131671 # Kubernetes histogram_metric = expected_etcd_metric.metric.add() # Label for app kubernetes summary_label = histogram_metric.label.add() summary_label.name, summary_label.value = "kind", "fs" summary_label = histogram_metric.label.add() summary_label.name, summary_label.value = "app", "kubernetes" for upper_bound, cumulative_count in [ (0.001, 718), (0.002, 740), (0.004, 743), (0.008, 748), (0.016, 751), (0.032, 751), (0.064, 751), (0.128, 751), (0.256, 751), (0.512, 751), (1.024, 751), (2.048, 751), (4.096, 751), (8.192, 751), (float('inf'), 751), ]: bucket = histogram_metric.histogram.bucket.add() bucket.upper_bound = upper_bound bucket.cumulative_count = cumulative_count # Root histogram sample histogram_metric.histogram.sample_count = 751 histogram_metric.histogram.sample_sum = 0.3097010759999998 # Iter on the generator to get all metrics response = MockResponse(text_data, 'text/plain; version=0.0.4') metrics = [k for k in self.check.parse_metric_family(response)] self.assertEqual(1, len(metrics)) current_metric = metrics[0] self.assertEqual(expected_etcd_metric, current_metric)
def test_parse_one_histogram(self): """ name: "etcd_disk_wal_fsync_duration_seconds" help: "The latency distributions of fsync called by wal." type: HISTOGRAM metric { histogram { sample_count: 4 sample_sum: 0.026131671 bucket { cumulative_count: 2 upper_bound: 0.001 } bucket { cumulative_count: 2 upper_bound: 0.002 } bucket { cumulative_count: 2 upper_bound: 0.004 } bucket { cumulative_count: 2 upper_bound: 0.008 } bucket { cumulative_count: 4 upper_bound: 0.016 } bucket { cumulative_count: 4 upper_bound: 0.032 } bucket { cumulative_count: 4 upper_bound: 0.064 } bucket { cumulative_count: 4 upper_bound: 0.128 } bucket { cumulative_count: 4 upper_bound: 0.256 } bucket { cumulative_count: 4 upper_bound: 0.512 } bucket { cumulative_count: 4 upper_bound: 1.024 } bucket { cumulative_count: 4 upper_bound: 2.048 } bucket { cumulative_count: 4 upper_bound: 4.096 } bucket { cumulative_count: 4 upper_bound: 8.192 } bucket { cumulative_count: 4 upper_bound: inf } } } """ text_data = ( '# HELP etcd_disk_wal_fsync_duration_seconds The latency distributions of fsync called by wal.\n' '# TYPE etcd_disk_wal_fsync_duration_seconds histogram\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{le="0.001"} 2\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{le="0.002"} 2\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{le="0.004"} 2\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{le="0.008"} 2\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{le="0.016"} 4\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{le="0.032"} 4\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{le="0.064"} 4\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{le="0.128"} 4\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{le="0.256"} 4\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{le="0.512"} 4\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{le="1.024"} 4\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{le="2.048"} 4\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{le="4.096"} 4\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{le="8.192"} 4\n' 'etcd_disk_wal_fsync_duration_seconds_bucket{le="+Inf"} 4\n' 'etcd_disk_wal_fsync_duration_seconds_sum 0.026131671\n' 'etcd_disk_wal_fsync_duration_seconds_count 4\n') expected_etcd_metric = metrics_pb2.MetricFamily() expected_etcd_metric.help = "The latency distributions of fsync called by wal." expected_etcd_metric.name = "etcd_disk_wal_fsync_duration_seconds" expected_etcd_metric.type = 4 histogram_metric = expected_etcd_metric.metric.add() for upper_bound, cumulative_count in [ (0.001, 2), (0.002, 2), (0.004, 2), (0.008, 2), (0.016, 4), (0.032, 4), (0.064, 4), (0.128, 4), (0.256, 4), (0.512, 4), (1.024, 4), (2.048, 4), (4.096, 4), (8.192, 4), (float('inf'), 4), ]: bucket = histogram_metric.histogram.bucket.add() bucket.upper_bound = upper_bound bucket.cumulative_count = cumulative_count # Root histogram sample histogram_metric.histogram.sample_count = 4 histogram_metric.histogram.sample_sum = 0.026131671 # Iter on the generator to get all metrics response = MockResponse(text_data, 'text/plain; version=0.0.4') metrics = [k for k in self.check.parse_metric_family(response)] self.assertEqual(1, len(metrics)) current_metric = metrics[0] self.assertEqual(expected_etcd_metric, current_metric)
def test_parse_metric_family_text(self): ''' Test the high level method for loading metrics from text format ''' _text_data = None f_name = os.path.join(os.path.dirname(__file__), 'fixtures', 'prometheus', 'metrics.txt') with open(f_name, 'r') as f: _text_data = f.read() self.assertEqual(len(_text_data), 14488) messages = list(self.check.parse_metric_family(_text_data, 'text/plain; version=0.0.4')) self.assertEqual(len(messages), 41) # Tests correct parsing of counters _counter = metrics_pb2.MetricFamily() _counter.name = 'skydns_skydns_dns_cachemiss_count_total' _counter.help = 'Counter of DNS requests that result in a cache miss.' _counter.type = 0 # COUNTER _c = _counter.metric.add() _c.counter.value = 1359194.0 _lc = _c.label.add() _lc.name = 'cache' _lc.value = 'response' self.assertIn(_counter, messages) # Tests correct parsing of gauges _gauge = metrics_pb2.MetricFamily() _gauge.name = 'go_memstats_heap_alloc_bytes' _gauge.help = 'Number of heap bytes allocated and still in use.' _gauge.type = 1 # GAUGE _gauge.metric.add().gauge.value = 6396288.0 self.assertIn(_gauge, messages) # Tests correct parsing of summaries _summary = metrics_pb2.MetricFamily() _summary.name = 'http_response_size_bytes' _summary.help = 'The HTTP response sizes in bytes.' _summary.type = 2 # SUMMARY _sm = _summary.metric.add() _lsm = _sm.label.add() _lsm.name = 'handler' _lsm.value = 'prometheus' _sm.summary.sample_count = 25 _sm.summary.sample_sum = 147728.0 _sq1 = _sm.summary.quantile.add() _sq1.quantile = 0.5 _sq1.value = 21470.0 _sq2 = _sm.summary.quantile.add() _sq2.quantile = 0.9 _sq2.value = 21470.0 _sq3 = _sm.summary.quantile.add() _sq3.quantile = 0.99 _sq3.value = 21470.0 self.assertIn(_summary, messages) # Tests correct parsing of histograms _histo = metrics_pb2.MetricFamily() _histo.name = 'skydns_skydns_dns_response_size_bytes' _histo.help = 'Size of the returns response in bytes.' _histo.type = 4 # HISTOGRAM _sample_data = [ {'ct':1359194,'sum':199427281.0, 'lbl': {'system':'auth'}, 'buckets':{0.0: 0, 512.0:1359194, 1024.0:1359194, 1500.0:1359194, 2048.0:1359194, float('+Inf'):1359194}}, {'ct':1359194,'sum':199427281.0, 'lbl': {'system':'recursive'}, 'buckets':{0.0: 0, 512.0:520924, 1024.0:520924, 1500.0:520924, 2048.0:520924, float('+Inf'):520924}}, {'ct':1359194,'sum':199427281.0, 'lbl': {'system':'reverse'}, 'buckets':{0.0: 0, 512.0:67648, 1024.0:67648, 1500.0:67648, 2048.0:67648, float('+Inf'):67648}}, ] for _data in _sample_data: _h = _histo.metric.add() _h.histogram.sample_count = _data['ct'] _h.histogram.sample_sum = _data['sum'] for k, v in _data['lbl'].items(): _lh = _h.label.add() _lh.name = k _lh.value = v for _b in sorted(_data['buckets'].iterkeys()): _subh = _h.histogram.bucket.add() _subh.upper_bound = _b _subh.cumulative_count = _data['buckets'][_b] self.assertIn(_histo, messages)
def parse_metric_family(self, response): """ Parse the MetricFamily from a valid requests.Response object to provide a MetricFamily object (see [0]) The text format uses iter_lines() generator. The protobuf format directly parse the response.content property searching for Prometheus messages of type MetricFamily [0] delimited by a varint32 [1] when the content-type is a `application/vnd.google.protobuf`. [0] https://github.com/prometheus/client_model/blob/086fe7ca28bde6cec2acd5223423c1475a362858/metrics.proto#L76-%20%20L81 [1] https://developers.google.com/protocol-buffers/docs/reference/java/com/google/protobuf/AbstractMessageLite#writeDelimitedTo(java.io.OutputStream) :param response: requests.Response :return: metrics_pb2.MetricFamily() """ if 'application/vnd.google.protobuf' in response.headers[ 'Content-Type']: n = 0 buf = response.content while n < len(buf): msg_len, new_pos = _DecodeVarint32(buf, n) n = new_pos msg_buf = buf[n:n + msg_len] n += msg_len message = metrics_pb2.MetricFamily() message.ParseFromString(msg_buf) # Lookup type overrides: if self.type_overrides and message.name in self.type_overrides: new_type = self.type_overrides[message.name] if new_type in self.METRIC_TYPES: message.type = self.METRIC_TYPES.index(new_type) else: self.log.debug( "type override %s for %s is not a valid type name" % (new_type, message.name)) yield message elif 'text/plain' in response.headers['Content-Type']: messages = defaultdict( list) # map with the name of the element (before the labels) # and the list of occurrences with labels and values obj_map = {} # map of the types of each metrics obj_help = {} # help for the metrics for metric in text_fd_to_metric_families( response.iter_lines(chunk_size=self.REQUESTS_CHUNK_SIZE)): metric_name = "%s_bucket" % metric.name if metric.type == "histogram" else metric.name metric_type = self.type_overrides.get(metric_name, metric.type) if metric_type == "untyped" or metric_type not in self.METRIC_TYPES: continue for sample in metric.samples: if (sample[0].endswith("_sum") or sample[0].endswith("_count")) and \ metric_type in ["histogram", "summary"]: messages[sample[0]].append({ "labels": sample[1], 'value': sample[2] }) else: messages[metric_name].append({ "labels": sample[1], 'value': sample[2] }) obj_map[metric.name] = metric_type obj_help[metric.name] = metric.documentation for _m in obj_map: if _m in messages or (obj_map[_m] == 'histogram' and ('{}_bucket'.format(_m) in messages)): yield self._extract_metric_from_map( _m, messages, obj_map, obj_help) else: raise UnknownFormatError( 'Unsupported content-type provided: {}'.format( response.headers['Content-Type']))
def test_parse_metric_family_text(self): ''' Test the high level method for loading metrics from text format ''' response = MockResponse(self.text_data, 'text/plain; version=0.0.4') messages = list(self.check.parse_metric_family(response)) # total metrics are 41 but one is typeless and we expect it not to be # parsed... self.assertEqual(len(messages), 40) # ...unless the check ovverrides the type manually self.check.type_overrides = {"go_goroutines": "gauge"} response = MockResponse(self.text_data, 'text/plain; version=0.0.4') messages = list(self.check.parse_metric_family(response)) self.assertEqual(len(messages), 41) # Tests correct parsing of counters _counter = metrics_pb2.MetricFamily() _counter.name = 'skydns_skydns_dns_cachemiss_count_total' _counter.help = 'Counter of DNS requests that result in a cache miss.' _counter.type = 0 # COUNTER _c = _counter.metric.add() _c.counter.value = 1359194.0 _lc = _c.label.add() _lc.name = 'cache' _lc.value = 'response' self.assertIn(_counter, messages) # Tests correct parsing of gauges _gauge = metrics_pb2.MetricFamily() _gauge.name = 'go_memstats_heap_alloc_bytes' _gauge.help = 'Number of heap bytes allocated and still in use.' _gauge.type = 1 # GAUGE _gauge.metric.add().gauge.value = 6396288.0 self.assertIn(_gauge, messages) # Tests correct parsing of summaries _summary = metrics_pb2.MetricFamily() _summary.name = 'http_response_size_bytes' _summary.help = 'The HTTP response sizes in bytes.' _summary.type = 2 # SUMMARY _sm = _summary.metric.add() _lsm = _sm.label.add() _lsm.name = 'handler' _lsm.value = 'prometheus' _sm.summary.sample_count = 25 _sm.summary.sample_sum = 147728.0 _sq1 = _sm.summary.quantile.add() _sq1.quantile = 0.5 _sq1.value = 21470.0 _sq2 = _sm.summary.quantile.add() _sq2.quantile = 0.9 _sq2.value = 21470.0 _sq3 = _sm.summary.quantile.add() _sq3.quantile = 0.99 _sq3.value = 21470.0 self.assertIn(_summary, messages) # Tests correct parsing of histograms _histo = metrics_pb2.MetricFamily() _histo.name = 'skydns_skydns_dns_response_size_bytes' _histo.help = 'Size of the returns response in bytes.' _histo.type = 4 # HISTOGRAM _sample_data = [ { 'ct': 1359194, 'sum': 199427281.0, 'lbl': { 'system': 'auth' }, 'buckets': { 0.0: 0, 512.0: 1359194, 1024.0: 1359194, 1500.0: 1359194, 2048.0: 1359194, float('+Inf'): 1359194 } }, { 'ct': 520924, 'sum': 41527128.0, 'lbl': { 'system': 'recursive' }, 'buckets': { 0.0: 0, 512.0: 520924, 1024.0: 520924, 1500.0: 520924, 2048.0: 520924, float('+Inf'): 520924 } }, { 'ct': 67648, 'sum': 6075182.0, 'lbl': { 'system': 'reverse' }, 'buckets': { 0.0: 0, 512.0: 67648, 1024.0: 67648, 1500.0: 67648, 2048.0: 67648, float('+Inf'): 67648 } }, ] for _data in _sample_data: _h = _histo.metric.add() _h.histogram.sample_count = _data['ct'] _h.histogram.sample_sum = _data['sum'] for k, v in _data['lbl'].items(): _lh = _h.label.add() _lh.name = k _lh.value = v for _b in sorted(_data['buckets'].iterkeys()): _subh = _h.histogram.bucket.add() _subh.upper_bound = _b _subh.cumulative_count = _data['buckets'][_b] self.assertIn(_histo, messages)
def _extract_metric_from_map(self, _m, messages, obj_map, obj_help): """ Extracts MetricFamily objects from the maps generated by parsing the strings in _extract_metrics_from_string """ _obj = metrics_pb2.MetricFamily() _obj.name = _m _obj.type = self.METRIC_TYPES.index(obj_map[_m]) if _m in obj_help: _obj.help = obj_help[_m] # trick for histograms _newlbl = _m if obj_map[_m] == 'histogram': _newlbl = '{}_bucket'.format(_m) # Loop through the array of metrics ({labels, value}) built earlier for _metric in messages[_newlbl]: # in the case of quantiles and buckets, they need to be grouped by labels if obj_map[_m] in ['summary', 'histogram' ] and len(_obj.metric) > 0: _label_exists = False _metric_minus = { k: v for k, v in _metric['labels'].items() if k not in ['quantile', 'le'] } _metric_idx = 0 for mls in _obj.metric: _tmp_lbl = {idx.name: idx.value for idx in mls.label} if _metric_minus == _tmp_lbl: _label_exists = True break _metric_idx = _metric_idx + 1 if _label_exists: _g = _obj.metric[_metric_idx] else: _g = _obj.metric.add() else: _g = _obj.metric.add() if obj_map[_m] == 'counter': _g.counter.value = float(_metric['value']) elif obj_map[_m] == 'gauge': _g.gauge.value = float(_metric['value']) elif obj_map[_m] == 'summary': if '{}_count'.format(_m) in messages: _g.summary.sample_count = long( self.get_metric_value_by_labels( messages, _metric, _m, 'count')) if '{}_sum'.format(_m) in messages: _g.summary.sample_sum = self.get_metric_value_by_labels( messages, _metric, _m, 'sum') # TODO: see what can be done with the untyped metrics elif obj_map[_m] == 'histogram': if '{}_count'.format(_m) in messages: _g.histogram.sample_count = long( self.get_metric_value_by_labels( messages, _metric, _m, 'count')) if '{}_sum'.format(_m) in messages: _g.histogram.sample_sum = self.get_metric_value_by_labels( messages, _metric, _m, 'sum') # last_metric = len(_obj.metric) - 1 # if last_metric >= 0: for lbl in _metric['labels']: # In the string format, the quantiles are in the labels if lbl == 'quantile': # _q = _obj.metric[last_metric].summary.quantile.add() _q = _g.summary.quantile.add() _q.quantile = float(_metric['labels'][lbl]) _q.value = float(_metric['value']) # The upper_bounds are stored as "le" labels on string format elif obj_map[_m] == 'histogram' and lbl == 'le': # _q = _obj.metric[last_metric].histogram.bucket.add() _q = _g.histogram.bucket.add() _q.upper_bound = float(_metric['labels'][lbl]) _q.cumulative_count = long(float(_metric['value'])) else: # labels deduplication is_in_labels = False for _existing_lbl in _g.label: if lbl == _existing_lbl.name: is_in_labels = True if not is_in_labels: _l = _g.label.add() _l.name = lbl _l.value = _metric['labels'][lbl] return _obj