Пример #1
0
    def __init__(self, metrics, prefix):
        self.metrics = metrics
        self.group_name = '%s-fetch-manager-metrics' % prefix

        self.bytes_fetched = metrics.sensor('bytes-fetched')
        self.bytes_fetched.add(metrics.metric_name('fetch-size-avg', self.group_name,
            'The average number of bytes fetched per request'), Avg())
        self.bytes_fetched.add(metrics.metric_name('fetch-size-max', self.group_name,
            'The maximum number of bytes fetched per request'), Max())
        self.bytes_fetched.add(metrics.metric_name('bytes-consumed-rate', self.group_name,
            'The average number of bytes consumed per second'), Rate())

        self.records_fetched = self.metrics.sensor('records-fetched')
        self.records_fetched.add(metrics.metric_name('records-per-request-avg', self.group_name,
            'The average number of records in each request'), Avg())
        self.records_fetched.add(metrics.metric_name('records-consumed-rate', self.group_name,
            'The average number of records consumed per second'), Rate())

        self.fetch_latency = metrics.sensor('fetch-latency')
        self.fetch_latency.add(metrics.metric_name('fetch-latency-avg', self.group_name,
            'The average time taken for a fetch request.'), Avg())
        self.fetch_latency.add(metrics.metric_name('fetch-latency-max', self.group_name,
            'The max time taken for any fetch request.'), Max())
        self.fetch_latency.add(metrics.metric_name('fetch-rate', self.group_name,
            'The number of fetch requests per second.'), Rate(sampled_stat=Count()))

        self.records_fetch_lag = metrics.sensor('records-lag')
        self.records_fetch_lag.add(metrics.metric_name('records-lag-max', self.group_name,
            'The maximum lag in terms of number of records for any partition in self window'), Max())

        self.fetch_throttle_time_sensor = metrics.sensor('fetch-throttle-time')
        self.fetch_throttle_time_sensor.add(metrics.metric_name('fetch-throttle-time-avg', self.group_name,
            'The average throttle time in ms'), Avg())
        self.fetch_throttle_time_sensor.add(metrics.metric_name('fetch-throttle-time-max', self.group_name,
            'The maximum throttle time in ms'), Max())
Пример #2
0
    def __init__(self, heartbeat, metrics, prefix, tags=None):
        self.heartbeat = heartbeat
        self.metrics = metrics
        self.metric_group_name = prefix + "-coordinator-metrics"

        self.heartbeat_latency = metrics.sensor('heartbeat-latency')
        self.heartbeat_latency.add(
            metrics.metric_name(
                'heartbeat-response-time-max', self.metric_group_name,
                'The max time taken to receive a response to a heartbeat request',
                tags), Max())
        self.heartbeat_latency.add(
            metrics.metric_name('heartbeat-rate', self.metric_group_name,
                                'The average number of heartbeats per second',
                                tags), Rate(sampled_stat=Count()))

        self.join_latency = metrics.sensor('join-latency')
        self.join_latency.add(
            metrics.metric_name('join-time-avg', self.metric_group_name,
                                'The average time taken for a group rejoin',
                                tags), Avg())
        self.join_latency.add(
            metrics.metric_name('join-time-max', self.metric_group_name,
                                'The max time taken for a group rejoin', tags),
            Max())
        self.join_latency.add(
            metrics.metric_name('join-rate', self.metric_group_name,
                                'The number of group joins per second', tags),
            Rate(sampled_stat=Count()))

        self.sync_latency = metrics.sensor('sync-latency')
        self.sync_latency.add(
            metrics.metric_name('sync-time-avg', self.metric_group_name,
                                'The average time taken for a group sync',
                                tags), Avg())
        self.sync_latency.add(
            metrics.metric_name('sync-time-max', self.metric_group_name,
                                'The max time taken for a group sync', tags),
            Max())
        self.sync_latency.add(
            metrics.metric_name('sync-rate', self.metric_group_name,
                                'The number of group syncs per second', tags),
            Rate(sampled_stat=Count()))

        metrics.add_metric(
            metrics.metric_name(
                'last-heartbeat-seconds-ago', self.metric_group_name,
                'The number of seconds since the last controller heartbeat was sent',
                tags),
            AnonMeasurable(lambda _, now:
                           (now / 1000) - self.heartbeat.last_send))
Пример #3
0
    def __init__(self, metrics, metric_group_prefix, subscription):
        self.metrics = metrics
        self.metric_group_name = '%s-coordinator-metrics' % (
            metric_group_prefix, )

        self.commit_latency = metrics.sensor('commit-latency')
        self.commit_latency.add(
            metrics.metric_name('commit-latency-avg', self.metric_group_name,
                                'The average time taken for a commit request'),
            Avg())
        self.commit_latency.add(
            metrics.metric_name('commit-latency-max', self.metric_group_name,
                                'The max time taken for a commit request'),
            Max())
        self.commit_latency.add(
            metrics.metric_name('commit-rate', self.metric_group_name,
                                'The number of commit calls per second'),
            Rate(sampled_stat=Count()))

        num_parts = AnonMeasurable(
            lambda config, now: len(subscription.assigned_partitions()))
        metrics.add_metric(
            metrics.metric_name(
                'assigned-partitions', self.metric_group_name,
                'The number of partitions currently assigned to this consumer'
            ), num_parts)
Пример #4
0
def test_simple_stats(mocker, time_keeper, config, metrics):
    mocker.patch('time.time', side_effect=time_keeper.time)

    measurable = ConstantMeasurable()

    metrics.add_metric(metrics.metric_name('direct.measurable', 'grp1',
                                            'The fraction of time an appender waits for space allocation.'),
                        measurable)
    sensor = metrics.sensor('test.sensor')
    sensor.add(metrics.metric_name('test.avg', 'grp1'), Avg())
    sensor.add(metrics.metric_name('test.max', 'grp1'), Max())
    sensor.add(metrics.metric_name('test.min', 'grp1'), Min())
    sensor.add(metrics.metric_name('test.rate', 'grp1'), Rate(TimeUnit.SECONDS))
    sensor.add(metrics.metric_name('test.occurences', 'grp1'),Rate(TimeUnit.SECONDS, Count()))
    sensor.add(metrics.metric_name('test.count', 'grp1'), Count())
    percentiles = [Percentile(metrics.metric_name('test.median', 'grp1'), 50.0),
                Percentile(metrics.metric_name('test.perc99_9', 'grp1'), 99.9)]
    sensor.add_compound(Percentiles(100, BucketSizing.CONSTANT, 100, -100,
                        percentiles=percentiles))

    sensor2 = metrics.sensor('test.sensor2')
    sensor2.add(metrics.metric_name('s2.total', 'grp1'), Total())
    sensor2.record(5.0)

    sum_val = 0
    count = 10
    for i in range(count):
        sensor.record(i)
        sum_val += i

    # prior to any time passing
    elapsed_secs = (config.time_window_ms * (config.samples - 1)) / 1000.0
    assert abs(count / elapsed_secs -
            metrics.metrics.get(metrics.metric_name('test.occurences', 'grp1')).value()) \
            < EPS, 'Occurrences(0...%d) = %f' % (count, count / elapsed_secs)

    # pretend 2 seconds passed...
    sleep_time_seconds = 2.0
    time_keeper.sleep(sleep_time_seconds)
    elapsed_secs += sleep_time_seconds

    assert abs(5.0 - metrics.metrics.get(metrics.metric_name('s2.total', 'grp1')).value()) \
            < EPS, 's2 reflects the constant value'
    assert abs(4.5 - metrics.metrics.get(metrics.metric_name('test.avg', 'grp1')).value()) \
            < EPS, 'Avg(0...9) = 4.5'
    assert abs((count - 1) - metrics.metrics.get(metrics.metric_name('test.max', 'grp1')).value()) \
            < EPS, 'Max(0...9) = 9'
    assert abs(0.0 - metrics.metrics.get(metrics.metric_name('test.min', 'grp1')).value()) \
            < EPS, 'Min(0...9) = 0'
    assert abs((sum_val / elapsed_secs) - metrics.metrics.get(metrics.metric_name('test.rate', 'grp1')).value()) \
            < EPS, 'Rate(0...9) = 1.40625'
    assert abs((count / elapsed_secs) - metrics.metrics.get(metrics.metric_name('test.occurences', 'grp1')).value()) \
            < EPS, 'Occurrences(0...%d) = %f' % (count, count / elapsed_secs)
    assert abs(count - metrics.metrics.get(metrics.metric_name('test.count', 'grp1')).value()) \
            < EPS, 'Count(0...9) = 10'
Пример #5
0
def test_old_data_has_no_effect(mocker, time_keeper):
    mocker.patch('time.time', side_effect=time_keeper.time)

    max_stat = Max()
    min_stat = Min()
    avg_stat = Avg()
    count_stat = Count()
    window_ms = 100
    samples = 2
    config = MetricConfig(time_window_ms=window_ms, samples=samples)
    max_stat.record(config, 50, time_keeper.ms())
    min_stat.record(config, 50, time_keeper.ms())
    avg_stat.record(config, 50, time_keeper.ms())
    count_stat.record(config, 50, time_keeper.ms())

    time_keeper.sleep(samples * window_ms / 1000.0)
    assert float('-inf') == max_stat.measure(config, time_keeper.ms())
    assert float(sys.maxsize) == min_stat.measure(config, time_keeper.ms())
    assert 0.0 == avg_stat.measure(config, time_keeper.ms())
    assert 0 == count_stat.measure(config, time_keeper.ms())
Пример #6
0
    def record_topic_fetch_metrics(self, topic, num_bytes, num_records):
        # record bytes fetched
        name = '.'.join(['topic', topic, 'bytes-fetched'])
        bytes_fetched = self.metrics.get_sensor(name)
        if not bytes_fetched:
            metric_tags = {'topic': topic.replace('.', '_')}

            bytes_fetched = self.metrics.sensor(name)
            bytes_fetched.add(
                self.metrics.metric_name(
                    'fetch-size-avg', self.group_name,
                    'The average number of bytes fetched per request for topic %s'
                    % (topic, ), metric_tags), Avg())
            bytes_fetched.add(
                self.metrics.metric_name(
                    'fetch-size-max', self.group_name,
                    'The maximum number of bytes fetched per request for topic %s'
                    % (topic, ), metric_tags), Max())
            bytes_fetched.add(
                self.metrics.metric_name(
                    'bytes-consumed-rate', self.group_name,
                    'The average number of bytes consumed per second for topic %s'
                    % (topic, ), metric_tags), Rate())
        bytes_fetched.record(num_bytes)

        # record records fetched
        name = '.'.join(['topic', topic, 'records-fetched'])
        records_fetched = self.metrics.get_sensor(name)
        if not records_fetched:
            metric_tags = {'topic': topic.replace('.', '_')}

            records_fetched = self.metrics.sensor(name)
            records_fetched.add(
                self.metrics.metric_name(
                    'records-per-request-avg', self.group_name,
                    'The average number of records in each request for topic %s'
                    % (topic, ), metric_tags), Avg())
            records_fetched.add(
                self.metrics.metric_name(
                    'records-consumed-rate', self.group_name,
                    'The average number of records consumed per second for topic %s'
                    % (topic, ), metric_tags), Rate())
        records_fetched.record(num_records)
Пример #7
0
    def __init__(self, metrics, metric_group_prefix, node_id):
        self.metrics = metrics

        # Any broker may have registered summary metrics already
        # but if not, we need to create them so we can set as parents below
        all_conns_transferred = metrics.get_sensor('bytes-sent-received')
        if not all_conns_transferred:
            metric_group_name = metric_group_prefix + '-metrics'

            bytes_transferred = metrics.sensor('bytes-sent-received')
            bytes_transferred.add(metrics.metric_name(
                'network-io-rate', metric_group_name,
                'The average number of network operations (reads or writes) on all'
                ' connections per second.'), Rate(sampled_stat=Count()))

            bytes_sent = metrics.sensor('bytes-sent',
                                        parents=[bytes_transferred])
            bytes_sent.add(metrics.metric_name(
                'outgoing-byte-rate', metric_group_name,
                'The average number of outgoing bytes sent per second to all'
                ' servers.'), Rate())
            bytes_sent.add(metrics.metric_name(
                'request-rate', metric_group_name,
                'The average number of requests sent per second.'),
                Rate(sampled_stat=Count()))
            bytes_sent.add(metrics.metric_name(
                'request-size-avg', metric_group_name,
                'The average size of all requests in the window.'), Avg())
            bytes_sent.add(metrics.metric_name(
                'request-size-max', metric_group_name,
                'The maximum size of any request sent in the window.'), Max())

            bytes_received = metrics.sensor('bytes-received',
                                            parents=[bytes_transferred])
            bytes_received.add(metrics.metric_name(
                'incoming-byte-rate', metric_group_name,
                'Bytes/second read off all sockets'), Rate())
            bytes_received.add(metrics.metric_name(
                'response-rate', metric_group_name,
                'Responses received sent per second.'),
                Rate(sampled_stat=Count()))

            request_latency = metrics.sensor('request-latency')
            request_latency.add(metrics.metric_name(
                'request-latency-avg', metric_group_name,
                'The average request latency in ms.'),
                Avg())
            request_latency.add(metrics.metric_name(
                'request-latency-max', metric_group_name,
                'The maximum request latency in ms.'),
                Max())

        # if one sensor of the metrics has been registered for the connection,
        # then all other sensors should have been registered; and vice versa
        node_str = 'node-{0}'.format(node_id)
        node_sensor = metrics.get_sensor(node_str + '.bytes-sent')
        if not node_sensor:
            metric_group_name = metric_group_prefix + '-node-metrics.' + node_str

            bytes_sent = metrics.sensor(
                node_str + '.bytes-sent',
                parents=[metrics.get_sensor('bytes-sent')])
            bytes_sent.add(metrics.metric_name(
                'outgoing-byte-rate', metric_group_name,
                'The average number of outgoing bytes sent per second.'),
                Rate())
            bytes_sent.add(metrics.metric_name(
                'request-rate', metric_group_name,
                'The average number of requests sent per second.'),
                Rate(sampled_stat=Count()))
            bytes_sent.add(metrics.metric_name(
                'request-size-avg', metric_group_name,
                'The average size of all requests in the window.'),
                Avg())
            bytes_sent.add(metrics.metric_name(
                'request-size-max', metric_group_name,
                'The maximum size of any request sent in the window.'),
                Max())

            bytes_received = metrics.sensor(
                node_str + '.bytes-received',
                parents=[metrics.get_sensor('bytes-received')])
            bytes_received.add(metrics.metric_name(
                'incoming-byte-rate', metric_group_name,
                'Bytes/second read off node-connection socket'),
                Rate())
            bytes_received.add(metrics.metric_name(
                'response-rate', metric_group_name,
                'The average number of responses received per second.'),
                Rate(sampled_stat=Count()))

            request_time = metrics.sensor(
                node_str + '.latency',
                parents=[metrics.get_sensor('request-latency')])
            request_time.add(metrics.metric_name(
                'request-latency-avg', metric_group_name,
                'The average request latency in ms.'),
                Avg())
            request_time.add(metrics.metric_name(
                'request-latency-max', metric_group_name,
                'The maximum request latency in ms.'),
                Max())

        self.bytes_sent = metrics.sensor(node_str + '.bytes-sent')
        self.bytes_received = metrics.sensor(node_str + '.bytes-received')
        self.request_time = metrics.sensor(node_str + '.latency')
Пример #8
0
    def __init__(self, metrics, client, metadata):
        self.metrics = metrics
        self._client = client
        self._metadata = metadata

        sensor_name = 'batch-size'
        self.batch_size_sensor = self.metrics.sensor(sensor_name)
        self.add_metric(
            'batch-size-avg',
            Avg(),
            sensor_name=sensor_name,
            description=
            'The average number of bytes sent per partition per-request.')
        self.add_metric(
            'batch-size-max',
            Max(),
            sensor_name=sensor_name,
            description=
            'The max number of bytes sent per partition per-request.')

        sensor_name = 'compression-rate'
        self.compression_rate_sensor = self.metrics.sensor(sensor_name)
        self.add_metric(
            'compression-rate-avg',
            Avg(),
            sensor_name=sensor_name,
            description='The average compression rate of record batches.')

        sensor_name = 'queue-time'
        self.queue_time_sensor = self.metrics.sensor(sensor_name)
        self.add_metric(
            'record-queue-time-avg',
            Avg(),
            sensor_name=sensor_name,
            description=
            'The average time in ms record batches spent in the record accumulator.'
        )
        self.add_metric(
            'record-queue-time-max',
            Max(),
            sensor_name=sensor_name,
            description=
            'The maximum time in ms record batches spent in the record accumulator.'
        )

        sensor_name = 'produce-throttle-time'
        self.produce_throttle_time_sensor = self.metrics.sensor(sensor_name)
        self.add_metric('produce-throttle-time-avg',
                        Avg(),
                        sensor_name=sensor_name,
                        description='The average throttle time in ms')
        self.add_metric('produce-throttle-time-max',
                        Max(),
                        sensor_name=sensor_name,
                        description='The maximum throttle time in ms')

        sensor_name = 'records-per-request'
        self.records_per_request_sensor = self.metrics.sensor(sensor_name)
        self.add_metric(
            'record-send-rate',
            Rate(),
            sensor_name=sensor_name,
            description='The average number of records sent per second.')
        self.add_metric(
            'records-per-request-avg',
            Avg(),
            sensor_name=sensor_name,
            description='The average number of records per request.')

        sensor_name = 'bytes'
        self.byte_rate_sensor = self.metrics.sensor(sensor_name)
        self.add_metric(
            'byte-rate',
            Rate(),
            sensor_name=sensor_name,
            description='The average number of bytes sent per second.')

        sensor_name = 'record-retries'
        self.retry_sensor = self.metrics.sensor(sensor_name)
        self.add_metric(
            'record-retry-rate',
            Rate(),
            sensor_name=sensor_name,
            description='The average per-second number of retried record sends'
        )

        sensor_name = 'errors'
        self.error_sensor = self.metrics.sensor(sensor_name)
        self.add_metric(
            'record-error-rate',
            Rate(),
            sensor_name=sensor_name,
            description=
            'The average per-second number of record sends that resulted in errors'
        )

        sensor_name = 'record-size-max'
        self.max_record_size_sensor = self.metrics.sensor(sensor_name)
        self.add_metric(
            'record-size-max',
            Max(),
            sensor_name=sensor_name,
            description='The maximum record size across all batches')
        self.add_metric(
            'record-size-avg',
            Avg(),
            sensor_name=sensor_name,
            description='The average maximum record size per batch')

        self.add_metric(
            'requests-in-flight',
            AnonMeasurable(lambda *_: self._client.in_flight_request_count()),
            description=
            'The current number of in-flight requests awaiting a response.')

        self.add_metric(
            'metadata-age',
            AnonMeasurable(lambda _, now: (
                now - self._metadata._last_successful_refresh_ms) / 1000),
            description=
            'The age in seconds of the current producer metadata being used.')