def __init__(self, metrics, metric_group_prefix, subscription): self.metrics = metrics self.metric_group_name = '%s-coordinator-metrics' % ( metric_group_prefix, ) self.commit_latency = metrics.sensor('commit-latency') self.commit_latency.add( metrics.metric_name('commit-latency-avg', self.metric_group_name, 'The average time taken for a commit request'), Avg()) self.commit_latency.add( metrics.metric_name('commit-latency-max', self.metric_group_name, 'The max time taken for a commit request'), Max()) self.commit_latency.add( metrics.metric_name('commit-rate', self.metric_group_name, 'The number of commit calls per second'), Rate(sampled_stat=Count())) num_parts = AnonMeasurable( lambda config, now: len(subscription.assigned_partitions())) metrics.add_metric( metrics.metric_name( 'assigned-partitions', self.metric_group_name, 'The number of partitions currently assigned to this consumer' ), num_parts)
def stats(self): measurables = [] def make_measure_fn(pct): return lambda config, now: self.value(config, now, pct / 100.0) for percentile in self._percentiles: measure_fn = make_measure_fn(percentile.percentile) stat = NamedMeasurable(percentile.name, AnonMeasurable(measure_fn)) measurables.append(stat) return measurables
def __init__(self, heartbeat, metrics, prefix, tags=None): self.heartbeat = heartbeat self.metrics = metrics self.metric_group_name = prefix + "-coordinator-metrics" self.heartbeat_latency = metrics.sensor('heartbeat-latency') self.heartbeat_latency.add( metrics.metric_name( 'heartbeat-response-time-max', self.metric_group_name, 'The max time taken to receive a response to a heartbeat request', tags), Max()) self.heartbeat_latency.add( metrics.metric_name('heartbeat-rate', self.metric_group_name, 'The average number of heartbeats per second', tags), Rate(sampled_stat=Count())) self.join_latency = metrics.sensor('join-latency') self.join_latency.add( metrics.metric_name('join-time-avg', self.metric_group_name, 'The average time taken for a group rejoin', tags), Avg()) self.join_latency.add( metrics.metric_name('join-time-max', self.metric_group_name, 'The max time taken for a group rejoin', tags), Max()) self.join_latency.add( metrics.metric_name('join-rate', self.metric_group_name, 'The number of group joins per second', tags), Rate(sampled_stat=Count())) self.sync_latency = metrics.sensor('sync-latency') self.sync_latency.add( metrics.metric_name('sync-time-avg', self.metric_group_name, 'The average time taken for a group sync', tags), Avg()) self.sync_latency.add( metrics.metric_name('sync-time-max', self.metric_group_name, 'The max time taken for a group sync', tags), Max()) self.sync_latency.add( metrics.metric_name('sync-rate', self.metric_group_name, 'The number of group syncs per second', tags), Rate(sampled_stat=Count())) metrics.add_metric( metrics.metric_name( 'last-heartbeat-seconds-ago', self.metric_group_name, 'The number of seconds since the last controller heartbeat was sent', tags), AnonMeasurable(lambda _, now: (now / 1000) - self.heartbeat.last_send))
def __init__(self, metrics, metric_group_prefix, conns): self.metrics = metrics self.metric_group_name = metric_group_prefix + '-metrics' self.connection_closed = metrics.sensor('connections-closed') self.connection_closed.add( metrics.metric_name( 'connection-close-rate', self.metric_group_name, 'Connections closed per second in the window.'), Rate()) self.connection_created = metrics.sensor('connections-created') self.connection_created.add( metrics.metric_name( 'connection-creation-rate', self.metric_group_name, 'New connections established per second in the window.'), Rate()) self.select_time = metrics.sensor('select-time') self.select_time.add( metrics.metric_name( 'select-rate', self.metric_group_name, 'Number of times the I/O layer checked for new I/O to perform per' ' second'), Rate(sampled_stat=Count())) self.select_time.add( metrics.metric_name( 'io-wait-time-ns-avg', self.metric_group_name, 'The average length of time the I/O thread spent waiting for a' ' socket ready for reads or writes in nanoseconds.'), Avg()) self.select_time.add( metrics.metric_name( 'io-wait-ratio', self.metric_group_name, 'The fraction of time the I/O thread spent waiting.'), Rate(time_unit=TimeUnit.NANOSECONDS)) self.io_time = metrics.sensor('io-time') self.io_time.add( metrics.metric_name( 'io-time-ns-avg', self.metric_group_name, 'The average length of time for I/O per select call in nanoseconds.' ), Avg()) self.io_time.add( metrics.metric_name( 'io-ratio', self.metric_group_name, 'The fraction of time the I/O thread spent doing I/O'), Rate(time_unit=TimeUnit.NANOSECONDS)) metrics.add_metric( metrics.metric_name('connection-count', self.metric_group_name, 'The current number of active connections.'), AnonMeasurable(lambda config, now: len(conns)))
def __init__(self, default_config=None, reporters=None, enable_expiration=False): """ Create a metrics repository with a default config, given metric reporters and the ability to expire eligible sensors Arguments: default_config (MetricConfig, optional): The default config reporters (list of AbstractMetricsReporter, optional): The metrics reporters enable_expiration (bool, optional): true if the metrics instance can garbage collect inactive sensors, false otherwise """ self._lock = threading.RLock() self._config = default_config or MetricConfig() self._sensors = {} self._metrics = {} self._children_sensors = {} self._reporters = reporters or [] for reporter in self._reporters: reporter.init([]) if enable_expiration: def expire_loop(): while True: # delay 30 seconds time.sleep(30) self.ExpireSensorTask.run(self) metrics_scheduler = threading.Thread(target=expire_loop) # Creating a daemon thread to not block shutdown metrics_scheduler.daemon = True metrics_scheduler.start() self.add_metric( self.metric_name("count", "kafka-metrics-count", "total number of registered metrics"), AnonMeasurable(lambda config, now: len(self._metrics)), )
def __init__(self, metrics, metric_group_prefix, conns): self.metrics = metrics self.metric_group_name = metric_group_prefix + "-metrics" self.connection_closed = metrics.sensor("connections-closed") self.connection_closed.add( metrics.metric_name( "connection-close-rate", self.metric_group_name, "Connections closed per second in the window.", ), Rate(), ) self.connection_created = metrics.sensor("connections-created") self.connection_created.add( metrics.metric_name( "connection-creation-rate", self.metric_group_name, "New connections established per second in the window.", ), Rate(), ) self.select_time = metrics.sensor("select-time") self.select_time.add( metrics.metric_name( "select-rate", self.metric_group_name, "Number of times the I/O layer checked for new I/O to perform per" " second", ), Rate(sampled_stat=Count()), ) self.select_time.add( metrics.metric_name( "io-wait-time-ns-avg", self.metric_group_name, "The average length of time the I/O thread spent waiting for a" " socket ready for reads or writes in nanoseconds.", ), Avg(), ) self.select_time.add( metrics.metric_name( "io-wait-ratio", self.metric_group_name, "The fraction of time the I/O thread spent waiting.", ), Rate(time_unit=TimeUnit.NANOSECONDS), ) self.io_time = metrics.sensor("io-time") self.io_time.add( metrics.metric_name( "io-time-ns-avg", self.metric_group_name, "The average length of time for I/O per select call in nanoseconds.", ), Avg(), ) self.io_time.add( metrics.metric_name( "io-ratio", self.metric_group_name, "The fraction of time the I/O thread spent doing I/O", ), Rate(time_unit=TimeUnit.NANOSECONDS), ) metrics.add_metric( metrics.metric_name( "connection-count", self.metric_group_name, "The current number of active connections.", ), AnonMeasurable(lambda config, now: len(conns)), )