示例#1
0
def print_managed_consumer_groups(client, args):
    """Get Kafka-managed consumer groups for a topic.

        :param client: KafkaClient connected to the cluster.
        :type client:  :class:`pykafka.KafkaClient`
        :param topic:  Name of the topic.
        :type topic:  :class:`str`
    """
    if args.topic not in client.topics:
        raise ValueError('Topic {} does not exist.'.format(args.topic))
    consumer_groups = {}
    brokers = client.brokers
    for broker_id, broker in iteritems(brokers):
        groups = broker.list_groups().groups.keys()
        groups_metadata = broker.describe_groups(group_ids=groups).groups
        for group_id, describe_group_response in iteritems(groups_metadata):
            members = describe_group_response.members
            for member_id, member in iteritems(members):
                topics = member.member_metadata.topic_names
                if args.topic in topics:
                    consumer_groups[group_id] = describe_group_response

    print('Topic: {}'.format(args.topic))
    print(tabulate.tabulate(
        [(group_id, x.state, x.protocol, x.protocol_type)
         for group_id, x in iteritems(consumer_groups)],
        headers=['GroupId', 'State', 'Protocol', 'ProtocolType']
    ))
示例#2
0
def print_offsets(client, args):
    """Print offsets for a topic/consumer group.

    NOTE: Time-based offset lookups are not precise, but are based on segment
          boundaries. If there is only one segment, as when Kafka has just
          started, the only offsets found will be [0, <latest_offset>].

    :param client: KafkaClient connected to the cluster.
    :type client:  :class:`pykafka.KafkaClient`
    :param topic:  Name of the topic.
    :type topic:  :class:`str`
    :param offset: Offset to reset to. Can be earliest, latest or a datetime.
        Using a datetime will reset the offset to the latest message published
        *before* the datetime.
    :type offset: :class:`pykafka.common.OffsetType` or
        :class:`datetime.datetime`
    """
    # Don't auto-create topics.
    if args.topic not in client.topics:
        raise ValueError('Topic {} does not exist.'.format(args.topic))
    topic = client.topics[args.topic]

    offsets = fetch_offsets(client, topic, args.offset)
    print(tabulate.tabulate(
        [(k, v.offset[0]) for k, v in iteritems(offsets)],
        headers=['Partition', 'Offset'],
        numalign='center',
    ))
    def test_rebalance_callbacks(self):
        def on_rebalance(cns, old_partition_offsets, new_partition_offsets):
            self.assertTrue(len(new_partition_offsets) > 0)
            self.assigned_called = True
            for id_ in iterkeys(new_partition_offsets):
                new_partition_offsets[id_] = self.offset_reset
            return new_partition_offsets

        self.assigned_called = False
        self.offset_reset = 50
        try:
            consumer_group = b'test_rebalance_callbacks'
            consumer_a = self.get_balanced_consumer(
                consumer_group,
                zookeeper_connect=self.kafka.zookeeper,
                auto_offset_reset=OffsetType.EARLIEST,
                post_rebalance_callback=on_rebalance,
                use_rdkafka=self.USE_RDKAFKA)
            consumer_b = self.get_balanced_consumer(
                consumer_group,
                zookeeper_connect=self.kafka.zookeeper,
                auto_offset_reset=OffsetType.EARLIEST,
                use_rdkafka=self.USE_RDKAFKA)
            self.wait_for_rebalancing(consumer_a, consumer_b)
            self.assertTrue(self.assigned_called)
            for _, offset in iteritems(consumer_a.held_offsets):
                self.assertEqual(offset, self.offset_reset)
        finally:
            try:
                consumer_a.stop()
                consumer_b.stop()
            except:
                pass
    def test_rebalance_callbacks(self):
        def on_rebalance(cns, old_partition_offsets, new_partition_offsets):
            self.assertTrue(len(new_partition_offsets) > 0)
            self.assigned_called = True
            for id_ in iterkeys(new_partition_offsets):
                new_partition_offsets[id_] = self.offset_reset
            return new_partition_offsets

        self.assigned_called = False
        self.offset_reset = 50
        try:
            consumer_group = b'test_rebalance_callbacks'
            consumer_a = self.get_balanced_consumer(
                consumer_group,
                zookeeper_connect=self.kafka.zookeeper,
                auto_offset_reset=OffsetType.EARLIEST,
                post_rebalance_callback=on_rebalance,
                use_rdkafka=self.USE_RDKAFKA)
            consumer_b = self.get_balanced_consumer(
                consumer_group,
                zookeeper_connect=self.kafka.zookeeper,
                auto_offset_reset=OffsetType.EARLIEST,
                use_rdkafka=self.USE_RDKAFKA)
            self.wait_for_rebalancing(consumer_a, consumer_b)
            self.assertTrue(self.assigned_called)
            for _, offset in iteritems(consumer_a.held_offsets):
                self.assertEqual(offset, self.offset_reset)
        finally:
            try:
                consumer_a.stop()
                consumer_b.stop()
            except:
                pass
示例#5
0
 def get_consumer(self, topic):
     if topic not in self.client.topics:
         return {'status': 'Error, topic {} does not exist'.format(topic)}
     brokers = self.client.brokers
     consumers = []
     for broker_id, broker in iteritems(brokers):
         groups = broker.list_groups().groups.keys()
         groups_metadata = broker.describe_groups(group_ids=groups).groups
         for group_id, describe_group_response in iteritems(
                 groups_metadata):
             members = describe_group_response.members
             for member_id, member in iteritems(members):
                 topics = member.member_metadata.topic_names
                 if topic in topics:
                     consumers.append(describe_group_response.group_id)
     return consumers
示例#6
0
def print_offsets(client, args):
    """Print offsets for a topic/consumer group.

    NOTE: Time-based offset lookups are not precise, but are based on segment
          boundaries. If there is only one segment, as when Kafka has just
          started, the only offsets found will be [0, <latest_offset>].

    :param client: KafkaClient connected to the cluster.
    :type client:  :class:`pykafka.KafkaClient`
    :param topic:  Name of the topic.
    :type topic:  :class:`str`
    :param offset: Offset to reset to. Can be earliest, latest or a datetime.
        Using a datetime will reset the offset to the latest message published
        *before* the datetime.
    :type offset: :class:`pykafka.common.OffsetType` or
        :class:`datetime.datetime`
    """
    # Don't auto-create topics.
    if args.topic not in client.topics:
        raise ValueError('Topic {} does not exist.'.format(args.topic))
    topic = client.topics[args.topic]

    offsets = fetch_offsets(client, topic, args.offset)
    print(tabulate.tabulate(
        [(k, v.offset[0]) for k, v in iteritems(offsets)],
        headers=['Partition', 'Offset'],
        numalign='center',
    ))
示例#7
0
def print_consumer_lag(client, args):
    """Print lag for a topic/consumer group.

    :param client: KafkaClient connected to the cluster.
    :type client:  :class:`pykafka.KafkaClient`
    :param topic:  Name of the topic.
    :type topic:  :class:`str`
    :param consumer_group: Name of the consumer group to fetch offsets for.
    :type consumer_groups: :class:`str`
    """
    # Don't auto-create topics.
    if args.topic not in client.topics:
        raise ValueError('Topic {} does not exist.'.format(args.topic))
    topic = client.topics[args.topic]

    lag_info = fetch_consumer_lag(client, topic, args.consumer_group)
    lag_info = [(k, '{:,}'.format(v[0] - v[1]), v[0], v[1])
                for k, v in iteritems(lag_info)]
    print(tabulate.tabulate(
        lag_info,
        headers=['Partition', 'Lag', 'Latest Offset', 'Current Offset'],
        numalign='center',
    ))

    total = sum(int(i[1].replace(',', '')) for i in lag_info)
    print('\n Total lag: {:,} messages.'.format(total))
示例#8
0
def print_consumer_lag(client, args):
    """Print lag for a topic/consumer group.

    :param client: KafkaClient connected to the cluster.
    :type client:  :class:`pykafka.KafkaClient`
    :param topic:  Name of the topic.
    :type topic:  :class:`str`
    :param consumer_group: Name of the consumer group to fetch offsets for.
    :type consumer_groups: :class:`str`
    """
    # Don't auto-create topics.
    if args.topic not in client.topics:
        raise ValueError('Topic {} does not exist.'.format(args.topic))
    topic = client.topics[args.topic]

    lag_info = fetch_consumer_lag(client, topic, args.consumer_group)
    lag_info = [(k, '{:,}'.format(v[0] - v[1]), v[0], v[1])
                for k, v in iteritems(lag_info)]
    print(tabulate.tabulate(
        lag_info,
        headers=['Partition', 'Lag', 'Latest Offset', 'Current Offset'],
        numalign='center',
    ))

    total = sum(int(i[1].replace(',', '')) for i in lag_info)
    print('\n Total lag: {:,} messages.'.format(total))
示例#9
0
    def get_consumer_lag(self, topic, consumer_group):
        """Get raw lag data for a topic/consumer group.
		:param topic:  Name of the topic.
		:type topic:  :class:`pykafka.topic.Topic`
		:param consumer_group: Name of the consumer group to fetch lag for.
		:type consumer_groups: :class:`str`
		:returns: dict of {partition_id: (latest_offset, consumer_offset)}
		"""
        if topic not in self.client.topics:
            return {'status': 'Error, topic {} does not exist'.format(topic)}
        top = self.client.topics[topic]
        latest_offsets = self.fetch_offsets(top, 'latest')
        consumer = top.get_simple_consumer(consumer_group=consumer_group,
                                           auto_start=False,
                                           reset_offset_on_fetch=False)
        current_offsets = consumer.fetch_offsets()
        pid_dict = {}
        for p_id, stat in current_offsets:
            pid_dict[p_id] = (latest_offsets[p_id].offset[0], stat.offset)
        lag_list = []
        consumer_details = {}
        consumer_details['name'] = consumer_group.decode()
        for k, v in iteritems(pid_dict):
            d = {
                'Partition': k,
                'Lag': v[0] - v[1],
                'Latest Offset': v[0],
                'Current Offset': v[1],
                #'Consumer_ID': v[2],
                #'Client_ID': v[3]
            }
            lag_list.append(d)
        consumer_details['partitions'] = lag_list
        return consumer_details
def get_consumer_lag(client, topic, consumer_group):
    try:
        lag_info = fetch_consumer_lag(client, topic, consumer_group)
        for k, v in iteritems(lag_info):
            graphite_update(
                'servers.' + stringsub(hostname) + '.kafka.' +
                stringsub(topic.name) + '.' + str(k), v[0] - v[1])
            print(
                'server.' + stringsub(hostname) + '.kafka.' +
                stringsub(topic.name) + '.' + str(k), v[0] - v[1],
                consumer_group)
    except AttributeError:
        pass
示例#11
0
 def test_consumer_lag(self):
     """Ensure that after consuming the entire topic, lag is 0"""
     with self._get_simple_consumer(consumer_group=b"test_lag_group",
                                    consumer_timeout_ms=1000) as consumer:
         while True:
             message = consumer.consume()
             if message is None:
                 break
         consumer.commit_offsets()
         latest_offsets = {p_id: res.offset[0]
                           for p_id, res
                           in iteritems(consumer.topic.latest_available_offsets())}
         current_offsets = {p_id: res.offset for p_id, res in consumer.fetch_offsets()}
         self.assertEqual(current_offsets, latest_offsets)
示例#12
0
    def _convert_offsets(offset_responses):
        """Helper function to translate Offset(Fetch)PartitionResponse

        Calls like consumer.fetch_offsets() and earliest_available_offsets()
        return lists of OffsetPartitionResponses.  These hold the next offset
        to be consumed, whereas consumer.held_offsets returns the latest
        consumed offset.  This translates them to facilitate comparisons.
        """
        if isinstance(offset_responses, dict):
            offset_responses = iteritems(offset_responses)
        f1 = lambda off: OffsetType.EARLIEST if off == 0 else off - 1
        f2 = lambda off: off[0] if isinstance(off, list) else off
        return {partition_id: f1(f2(offset_response.offset))
                for partition_id, offset_response in offset_responses}
示例#13
0
 def test_consumer_lag(self):
     """Ensure that after consuming the entire topic, lag is 0"""
     with self._get_simple_consumer(consumer_group=b"test_lag_group",
                                    consumer_timeout_ms=1000) as consumer:
         while True:
             message = consumer.consume()
             if message is None:
                 break
         consumer.commit_offsets()
         latest_offsets = {p_id: res.offset[0]
                           for p_id, res
                           in iteritems(consumer.topic.latest_available_offsets())}
         current_offsets = {p_id: res.offset for p_id, res in consumer.fetch_offsets()}
         self.assertEqual(current_offsets, latest_offsets)
示例#14
0
    def _convert_offsets(offset_responses):
        """Helper function to translate Offset(Fetch)PartitionResponse

        Calls like consumer.fetch_offsets() and earliest_available_offsets()
        return lists of OffsetPartitionResponses.  These hold the next offset
        to be consumed, whereas consumer.held_offsets returns the latest
        consumed offset.  This translates them to facilitate comparisons.
        """
        if isinstance(offset_responses, dict):
            offset_responses = iteritems(offset_responses)
        f1 = lambda off: OffsetType.EARLIEST if off == 0 else off - 1  # noqa
        f2 = lambda off: off[0] if isinstance(off, list) else off  # noqa
        return {partition_id: f1(f2(offset_response.offset))
                for partition_id, offset_response in offset_responses}
示例#15
0
def reset_offsets(client, args):
    """Reset offset for a topic/consumer group.

    NOTE: Time-based offset lookups are not precise, but are based on segment
          boundaries. If there is only one segment, as when Kafka has just
          started, the only offsets found will be [0, <latest_offset>].

    :param client: KafkaClient connected to the cluster.
    :type client:  :class:`pykafka.KafkaClient`
    :param topic:  Name of the topic.
    :type topic:  :class:`str`
    :param consumer_group: Name of the consumer group to reset offsets for.
    :type consumer_groups: :class:`str`
    :param offset: Offset to reset to. Can be earliest, latest or a datetime.
        Using a datetime will reset the offset to the latest message published
        *before* the datetime.
    :type offset: :class:`pykafka.common.OffsetType` or
        :class:`datetime.datetime`
    """
    # Don't auto-create topics.
    if args.topic not in client.topics:
        raise ValueError('Topic {} does not exist.'.format(args.topic))
    topic = client.topics[args.topic]

    # Build offset commit requests.
    offsets = fetch_offsets(client, topic, args.offset)
    tmsp = int(time.time() * 1000)
    reqs = [PartitionOffsetCommitRequest(topic.name,
                                         partition_id,
                                         res.offset[0],
                                         tmsp,
                                         'kafka-tools')
            for partition_id, res in iteritems(offsets)]

    # Send them to the appropriate broker.
    broker = client.cluster.get_group_coordinator(args.consumer_group)
    broker.commit_consumer_group_offsets(
        args.consumer_group, 1, 'kafka-tools', reqs
    )
示例#16
0
def reset_offsets(client, args):
    """Reset offset for a topic/consumer group.

    NOTE: Time-based offset lookups are not precise, but are based on segment
          boundaries. If there is only one segment, as when Kafka has just
          started, the only offsets found will be [0, <latest_offset>].

    :param client: KafkaClient connected to the cluster.
    :type client:  :class:`pykafka.KafkaClient`
    :param topic:  Name of the topic.
    :type topic:  :class:`str`
    :param consumer_group: Name of the consumer group to reset offsets for.
    :type consumer_groups: :class:`str`
    :param offset: Offset to reset to. Can be earliest, latest or a datetime.
        Using a datetime will reset the offset to the latest message published
        *before* the datetime.
    :type offset: :class:`pykafka.common.OffsetType` or
        :class:`datetime.datetime`
    """
    # Don't auto-create topics.
    if args.topic not in client.topics:
        raise ValueError('Topic {} does not exist.'.format(args.topic))
    topic = client.topics[args.topic]

    # Build offset commit requests.
    offsets = fetch_offsets(client, topic, args.offset)
    tmsp = int(time.time() * 1000)
    reqs = [PartitionOffsetCommitRequest(topic.name,
                                         partition_id,
                                         res.offset[0],
                                         tmsp,
                                         'kafka-tools')
            for partition_id, res in iteritems(offsets)]

    # Send them to the appropriate broker.
    broker = client.cluster.get_group_coordinator(args.consumer_group)
    broker.commit_consumer_group_offsets(
        args.consumer_group, 1, 'kafka-tools', reqs
    )
示例#17
0
    def collect(self, _):
        try:
            # Disable verbose logging for Kafka client lib
            logger = logging.getLogger("pykafka")
            logger.setLevel(logging.ERROR)

            host = self.get('ip', 'localhost')
            port = self.get('port', 9092)
            consumer_group_regex = re.compile(
                self.get('consumer_group_regex', '.*'))
            consumer_group_names = self.get('consumer_groups', '')

            server = f'{host}:{port}'
            client = pykafka.KafkaClient(hosts=server)

            consumer_groups = set(
                map(
                    lambda g: g.encode('UTF-8'),
                    filter(lambda g: len(g) > 0,
                           consumer_group_names.split(','))))

            brokers = client.brokers

            group_topics = {}
            member_assignment = {}
            for _, broker in iteritems(brokers):
                broker_groups = broker.list_groups().groups.keys()
                if consumer_groups:
                    groups = list(
                        filter(lambda g: g in consumer_groups, broker_groups))
                else:
                    groups = list(
                        filter(
                            lambda g: consumer_group_regex.match(
                                g.decode('UTF-8')), broker_groups))

                groups_metadata = broker.describe_groups(
                    group_ids=groups).groups
                for group_id, describe_group_response in iteritems(
                        groups_metadata):
                    members = describe_group_response.members
                    topics = set([])
                    for member_id, member in iteritems(members):
                        for topic, assignments in member.member_assignment.partition_assignment:
                            topics.add(topic)
                            for assignment in assignments:
                                member_assignment[(topic, group_id,
                                                   assignment)] = member_id
                    group_topics[group_id] = topics

            lags = []
            for group_id, topics in iteritems(group_topics):
                for topic_name in topics:
                    topic = client.topics[topic_name]
                    lag = kafka_tools.fetch_consumer_lag(
                        client, topic, group_id)
                    for partition, offsets in iteritems(lag):
                        member = member_assignment[(topic_name, group_id,
                                                    partition)]
                        lags.append(
                            MemberLag(topic_name, group_id, member, partition,
                                      offsets[0], offsets[1]))

            for lag in lags:
                labels = {
                    'topic': lag.topic,
                    'consumer_group': lag.group,
                    'consumer_client_id': lag.member,
                    'topic_partition': lag.partition
                }
                self.gauge('kafka_consumer_lag', labels).set(lag.lag())

            return Status.OK
        except Exception:
            self.logger.error('Unable to scrape metrics from Kafka')
            return Status.CRITICAL
                    required=True,
                    help='-kafkabroker 127.0.0.1:9092')
args = parser.parse_args()

kafka_brokers = sys.argv[2]

client = KafkaClient(hosts=kafka_brokers)

# get all topics
topics = client.topics

# get all brokers
brokers = client.brokers

consumer_groups = []

# get all consumer groups
for broker_id, broker in brokers.iteritems():
    consumer_groups = consumer_groups + broker.list_groups().groups.keys()
# get all consumer groups lag according each topic
for topic in client.topics:
    c_topic = topics[topic]
    for group in consumer_groups:
        try:
            lags = kafka_tools.fetch_consumer_lag(client, c_topic, group)
            lag_info = [(k, '{:}'.format(v[0] - v[1]), v[0], v[1])
                        for k, v in iteritems(lags)]
            print(topic, group, lag_info)
        except Exception, e:
            log.critical(e)