def _DistributionFromHistogram(metric, values_by_suffix): """Instantiate a rdf_stats.Distribution from a Prometheus Histogram. Prometheus Histogram uses cumulative "buckets" lower or equal to an upper bound. At instantiation, +Inf is implicitly appended to the upper bounds. The delimiters [0.0, 0.1, 0.2 (, +Inf)] produce the following buckets: Bucket "0.0" : -Inf <= values <= 0.0 Bucket "0.1" : -Inf <= values <= 0.1 Bucket "0.2" : -Inf <= values <= 0.2 Bucket "+Inf": -Inf <= values <= +Inf Distribution uses exclusive bins greater or equal to a lower bound and strictly lower than the next lower bound. At instantiation, -Inf is implicitly prepended. The delimiters [(-Inf,) 0.0, 0.1, 0.2] produce the following bins: Bin "-Inf": -Inf <= values < 0.0 Bin "0.0" : 0.0 <= values < 0.1 Bin "0.1" : 0.1 <= values < 0.2 Bin "0.2" : 0.2 <= values <= +Inf Thus, Histogram buckets can be transformed to Distribution bins, by reading in the same order and subtracting the value of the previous bin to remove the cumulative sum. There is a slight incompatibility for values equal to bin boundaries, because boundaries describe the upper bound for Prometheus and the lower bound for our internal implementation. Args: metric: prometheus_stats_collector.Metric values_by_suffix: dict of metric name suffixes and sample values lists Returns: rdf_stats.Distribution Raises: ValueError: The Histogram and metadata bin count do not match. """ dist = rdf_stats.Distribution(bins=list(metric.metadata.bins)) if metric.metadata.bins and len(dist.heights) != len( values_by_suffix["_bucket"]): raise ValueError( "Trying to create Distribution with {} bins, but underlying" "Histogram has {} buckets".format(len( dist.heights), len(values_by_suffix["_bucket"]))) dist.heights = values_by_suffix["_bucket"] # Remove cumulative sum by subtracting the value of the previous bin for i in reversed(range(1, len(dist.heights))): dist.heights[i] -= dist.heights[i - 1] dist.count = values_by_suffix["_count"][0] dist.sum = values_by_suffix["_sum"][0] return dist
def testStatsEntryId_IgnoreMetricValues(self): """Ensures metric values have no influence id generation.""" int_entry = stats_values.StatsStoreEntry( process_id="test_process", metric_name="test_metric", metric_value=stats_values.StatsStoreValue( value_type=rdf_stats.MetricMetadata.ValueType.INT, int_value=42), timestamp=_one_second_timestamp) float_entry = stats_values.StatsStoreEntry( process_id="test_process", metric_name="test_metric", metric_value=stats_values.StatsStoreValue( value_type=rdf_stats.MetricMetadata.ValueType.FLOAT, float_value=4.2), timestamp=_one_second_timestamp) # TODO: String gauges are deprecated. str_entry = stats_values.StatsStoreEntry( process_id="test_process", metric_name="test_metric", metric_value=stats_values.StatsStoreValue( value_type=rdf_stats.MetricMetadata.ValueType.DEPRECATED_STR, str_value="foo"), timestamp=_one_second_timestamp) distribution_entry = stats_values.StatsStoreEntry( process_id="test_process", metric_name="test_metric", metric_value=stats_values.StatsStoreValue( value_type=rdf_stats.MetricMetadata.ValueType.DISTRIBUTION, distribution_value=rdf_stats.Distribution()), timestamp=_one_second_timestamp) expected_id = ( b"\x8e\xf4\xe7\xdb\x03\x01}sB\x97\x98\x957\x18\x02U\xb0\xe6x\x9f" b"\x97Xfs/C\xedT\xd3\x89N\xe5") self.assertEqual(db_utils.GenerateStatsEntryId(int_entry), expected_id) self.assertEqual(db_utils.GenerateStatsEntryId(float_entry), expected_id) self.assertEqual(db_utils.GenerateStatsEntryId(str_entry), expected_id) self.assertEqual(db_utils.GenerateStatsEntryId(distribution_entry), expected_id)
def _DefaultValue(self): return rdf_stats.Distribution(bins=self._bins)