def collect(files): """This almost verbatim from MultiProcessCollector.collect(). The original collects all results in a format designed to be scraped. We instead need to collect limited results, in a format that can be written back to disk. To facilitate this, this version of collect() preserves label ordering, and does not aggregate the histograms. Specifically, it differs from the original: 1. it takes its files as an argument, rather than hardcoding '*.db' 2. it does not accumulate histograms 3. it preserves label order, to facilitate being inserted back into an mmap file. It needs to be kept up to date with changes to prometheus_client as much as possible, or until changes are landed upstream to allow reuse of collect(). """ from prometheus_client import core metrics = {} for f in files: if not os.path.exists(f): continue # verbatim from here... parts = os.path.basename(f).split('_') typ = parts[0] d = core._MmapedDict(f, read_mode=True) for key, value in d.read_all_values(): metric_name, name, labelnames, labelvalues = json.loads(key) metric = metrics.get(metric_name) if metric is None: metric = core.Metric(metric_name, 'Multiprocess metric', typ) metrics[metric_name] = metric if typ == 'gauge': pid = parts[2][:-3] metric._multiprocess_mode = parts[1] metric.add_sample( name, tuple(zip(labelnames, labelvalues)) + (('pid', pid), ), value, ) else: # The duplicates and labels are fixed in the next for. metric.add_sample( name, tuple(zip(labelnames, labelvalues)), value, ) d.close() for metric in metrics.values(): samples = defaultdict(float) buckets = {} for name, labels, value in metric.samples: if metric.type == 'gauge': without_pid = tuple(l for l in labels if l[0] != 'pid') if metric._multiprocess_mode == 'min': current = samples.setdefault((name, without_pid), value) if value < current: samples[(name, without_pid)] = value elif metric._multiprocess_mode == 'max': current = samples.setdefault((name, without_pid), value) if value > current: samples[(name, without_pid)] = value elif metric._multiprocess_mode == 'livesum': samples[(name, without_pid)] += value else: # all/liveall samples[(name, labels)] = value elif metric.type == 'histogram': bucket = tuple(float(l[1]) for l in labels if l[0] == 'le') if bucket: # _bucket without_le = tuple(l for l in labels if l[0] != 'le') buckets.setdefault(without_le, {}) buckets[without_le].setdefault(bucket[0], 0.0) buckets[without_le][bucket[0]] += value else: # _sum/_count samples[(name, labels)] += value else: # Counter and Summary. samples[(name, labels)] += value # end of verbatim copy # modified to remove accumulation if metric.type == 'histogram': for labels, values in buckets.items(): for bucket, value in sorted(values.items()): key = ( metric.name + '_bucket', labels + (('le', core._floatToGoString(bucket)), ), ) samples[key] = value # Convert to correct sample format. metric.samples = [ # OrderedDict used instead of dict (name, OrderedDict(labels), value) for (name, labels), value in samples.items() ] return metrics.values()
def build_metric(name, documentation, typ, samples): metric = core.Metric(name, documentation, typ) metric.samples = samples return metric