def __init__( self, brokers, group, zk_hosts, deserializer=None, partition_allocator=None, autocommit=True, max_wait_time=1000, # in milliseconds min_bytes=1, max_bytes=(1024 * 1024), ): super(GroupedConsumer, self).__init__( brokers, deserializer, max_wait_time, min_bytes, max_bytes ) self.group_name = group self.coordinator_id = None self.allocator = PartitionAllocator( zk_hosts, self.group_name, self.name, allocator_fn=partition_allocator or naive_allocator, on_rebalance=self.synced_offsets.clear ) self.topics_to_commit = set() self.autocommit = autocommit
class GroupedConsumer(BaseConsumer): """ Consumer class with coordinated resource allocation among like members. Uses an instance of a ``PartitionAllocator`` to determine which topics and partitions to consume. Whenever the allocation is rebalanced, each consumed topic will have its partition offsets re-determined. Constructed similarly to the ``SingleConsumer`` class except for extra paramters ``group``, ``zk_hosts``, ``partition_allocator`` and ``autocommit``. """ def __init__( self, brokers, group, zk_hosts, deserializer=None, partition_allocator=None, autocommit=True, max_wait_time=1000, # in milliseconds min_bytes=1, max_bytes=(1024 * 1024), ): super(GroupedConsumer, self).__init__( brokers, deserializer, max_wait_time, min_bytes, max_bytes ) self.group_name = group self.coordinator_id = None self.allocator = PartitionAllocator( zk_hosts, self.group_name, self.name, allocator_fn=partition_allocator or naive_allocator, on_rebalance=self.synced_offsets.clear ) self.topics_to_commit = set() self.autocommit = autocommit @property def allocation(self): """ Proxy property for the topics/partitions determined by the allocator. """ return self.allocator.allocation @gen.coroutine def connect(self): """ Overriding ``connect()`` that handles the allocator and coordinator. Simple augmentation of the base class method that starts the allocator and calls `determine_coordinator()`. """ yield super(GroupedConsumer, self).connect() yield self.allocator.start(self.cluster.topics) yield self.determine_coordinator() return @gen.coroutine def check_topic(self, topic): if not self.allocation or not self.allocation[topic]: self.coordinator_id = None yield self.allocator.start(self.cluster.topics) #获取组内领导者及分区 if not self.coordinator_id and self.allocation[topic]: yield self.determine_coordinator() if not self.allocation[topic]: raise NoOffsetsError @gen.coroutine def consume(self, topic, start=None): """ Overriding ``consume()`` that handles committing offsets. This is where the ``autocommit`` flag comes into play. If the flag is set we call `commit_offsets()` here right off the bat. """ result = yield super(GroupedConsumer, self).consume(topic) if topic not in self.synced_offsets: raise gen.Return([]) self.topics_to_commit.add(topic) if self.autocommit: yield self.commit_offsets() raise gen.Return(result) @gen.coroutine def determine_coordinator(self): """ Determines the ID of the broker that coordinates the group. Uses the "consumer metadata" api to do its thing. All brokers contain coordinator metadata so each broker in the cluster is tried until one works. """ request = coordinator.GroupCoordinatorRequest(group=self.group_name) determined = False while not determined: broker_ids = list(self.cluster) if not broker_ids: raise exc.NoBrokersError for broker_id in broker_ids: results = yield self.send({broker_id: request}) determined = results[broker_id] if determined: break def handle_group_coordinator_response(self, response): """ Handler for consumer metadata api responses. These responses are relatively simple and successful ones merely list the ID, host and port of the coordinator. Returns ``True`` if the coordinator was deterimend, ``False`` if not. """ determined = False if response.error_code == errors.no_error: log.info("Found coordinator: broker %s", response.coordinator_id) self.coordinator_id = response.coordinator_id determined = True elif response.error_code in errors.retriable: self.heal_cluster = True determined = False else: log.error("Got error %s when determining coordinator") determined = True return determined @gen.coroutine def determine_offsets(self, topic, start=None): """ Fetches offsets for a given topic via the "offset fetch" api. Simple matter of sending an OffsetFetchRequest to the coordinator broker. .. note:: The ``start`` argument is actually ignored, it exists so that the signature remains consistent with the other consumer classes. """ log.info("Fetching offsets for consumer group '%s'", self.group_name) yield self.check_topic(topic) request = offset_fetch.OffsetFetchRequest( group_name=self.group_name, topics=[ offset_fetch.TopicRequest( name=topic, partitions=list(self.allocation[topic]) ) ] ) """ 获取中心协调器kafka0.9.x版本中引入,大体思想是为所有consumer group的子集选举出一个broker作为 coordinator,由它watch zookeeper,从而判断是否有partition或者consumer的增减,然后生成 rebalance命令,并检查是否这些rebalance在所有相关的consumer中被执行成功,如果不成功则重试, 若成功则认为此次rebalance成功(这个过程跟repication controller非常类似) """ num = 0 retry = True while retry and num < 5: #获取偏移量,默认查询5次,新建用户组在查询时会一直返回True result = yield self.send({self.coordinator_id: request}) retry = result[self.coordinator_id] num += 1 def handle_offset_fetch_response(self, response): """ Handler for offset fetch api responses. Sets the corresponding entry in the ``self.offsets`` structure for successful partition responses. Raises a ``NoOffsetsError`` exception if a fatal, non-retriable error is encountered. Returns ``True`` if the operation should be retried, ``False`` if not. """ retry = False if len(response.topics) <= 0: return False topic = response.topics[0].name for partition in response.topics[0].partitions: code = partition.error_code if code == errors.no_error: log.debug( "Got offset %d for group %s topic %s partition %d", partition.offset, self.group_name, topic, partition.partition_id ) self.offsets[topic][partition.partition_id] = partition.offset elif code == errors.offsets_load_in_progress: log.info( "Offsets load in progress for topic %s partition %s" + " retrying offset fetch.", topic, partition.partition_id ) retry = True elif code in errors.retriable: self.heal_cluster = True retry = True else: log.error( "Got error %s for topic %s partition %s", constants.ERROR_CODES[code], topic, partition.partition_id ) raise exc.NoOffsetsError return retry @gen.coroutine def commit_offsets(self, metadata=None): """ Notifies Kafka that the consumer's messages have been processed. Uses the "v0" version of the offset commit request to maintain compatability with clusters running 0.8.1. """ if metadata is None: metadata = "committed by %s" % self.name log.debug("Committing offsets for consumer group %s", self.group_name) request = offset_commit.OffsetCommitV0Request( group=self.group_name, topics=[ offset_commit.TopicRequest( name=topic, partitions=[ offset_commit.PartitionRequest( partition_id=partition_id, offset=self.offsets[topic][partition_id], metadata=metadata ) for partition_id in partition_ids ] ) for topic, partition_ids in six.iteritems(self.allocation) if topic in self.topics_to_commit ] ) results = yield self.send({self.coordinator_id: request}) retry, adjust_metadata = results[self.coordinator_id] if adjust_metadata: log.warn("Offset commit metadata '%s' was too long.", metadata) metadata = "" if retry: yield self.commit_offsets(metadata=metadata) def handle_offset_commit_response(self, response): """ Handles responses from the "offset commit" api. For successful responses the affected topics are dropped from the set of topics that need commits. In the special case of an ``offset_metadata_too_large`` error code the commit is retried with a blank metadata string. """ retry = False adjust_metadata = False for topic in response.topics: for partition in topic.partitions: code = partition.error_code if code == errors.no_error: self.topics_to_commit.discard(topic.name) elif code in errors.retriable: retry = True self.heal_cluster = True elif code == errors.offset_metadata_too_large: retry = True adjust_metadata = True else: log.error( "Got error %s for topic %s partition %s", constants.ERROR_CODES[code], topic, partition.partition_id ) return (retry, adjust_metadata) @gen.coroutine def wind_down(self): """ Winding down calls ``stop()`` on the allocator. """ yield self.allocator.stop()
class GroupedConsumer(BaseConsumer): """ Consumer class with coordinated resource allocation among like members. Uses an instance of a ``PartitionAllocator`` to determine which topics and partitions to consume. Whenever the allocation is rebalanced, each consumed topic will have its partition offsets re-determined. Constructed similarly to the ``SingleConsumer`` class except for extra paramters ``group``, ``zk_hosts``, ``partition_allocator`` and ``autocommit``. """ def __init__( self, brokers, group, zk_hosts, deserializer=None, partition_allocator=None, autocommit=True, max_wait_time=1000, # in milliseconds min_bytes=1, max_bytes=(1024 * 1024), ): super(GroupedConsumer, self).__init__( brokers, deserializer, max_wait_time, min_bytes, max_bytes ) self.group_name = group self.coordinator_id = None self.allocator = PartitionAllocator( zk_hosts, self.group_name, self.name, allocator_fn=partition_allocator or naive_allocator, on_rebalance=self.synced_offsets.clear ) self.topics_to_commit = set() self.autocommit = autocommit @property def allocation(self): """ Proxy property for the topics/partitions determined by the allocator. """ return self.allocator.allocation @gen.coroutine def connect(self): """ Overriding ``connect()`` that handles the allocator and coordinator. Simple augmentation of the base class method that starts the allocator and calls `determine_coordinator()`. """ yield super(GroupedConsumer, self).connect() yield self.allocator.start(self.cluster.topics) yield self.determine_coordinator() @gen.coroutine def consume(self, topic, start=None): """ Overriding ``consume()`` that handles committing offsets. This is where the ``autocommit`` flag comes into play. If the flag is set we call `commit_offsets()` here right off the bat. """ result = yield super(GroupedConsumer, self).consume(topic) if topic not in self.synced_offsets: raise gen.Return([]) self.topics_to_commit.add(topic) if self.autocommit: yield self.commit_offsets() raise gen.Return(result) @gen.coroutine def determine_coordinator(self): """ Determines the ID of the broker that coordinates the group. Uses the "consumer metadata" api to do its thing. All brokers contain coordinator metadata so each broker in the cluster is tried until one works. """ request = coordinator.GroupCoordinatorRequest(group=self.group_name) determined = False while not determined: broker_ids = list(self.cluster) if not broker_ids: raise exc.NoBrokersError for broker_id in broker_ids: results = yield self.send({broker_id: request}) determined = results[broker_id] if determined: break def handle_group_coordinator_response(self, response): """ Handler for consumer metadata api responses. These responses are relatively simple and successful ones merely list the ID, host and port of the coordinator. Returns ``True`` if the coordinator was deterimend, ``False`` if not. """ determined = False if response.error_code == errors.no_error: log.info("Found coordinator: broker %s", response.coordinator_id) self.coordinator_id = response.coordinator_id determined = True elif response.error_code in errors.retriable: self.heal_cluster = True determined = False else: log.error("Got error %s when determining coordinator") determined = True return determined @gen.coroutine def determine_offsets(self, topic, start=None): """ Fetches offsets for a given topic via the "offset fetch" api. Simple matter of sending an OffsetFetchRequest to the coordinator broker. .. note:: The ``start`` argument is actually ignored, it exists so that the signature remains consistent with the other consumer classes. """ log.info("Fetching offsets for consumer group '%s'", self.group_name) request = offset_fetch.OffsetFetchRequest( group_name=self.group_name, topics=[ offset_fetch.TopicRequest( name=topic, partitions=list(self.allocation[topic]) ) ] ) retry = True while retry: result = yield self.send({self.coordinator_id: request}) retry = result[self.coordinator_id] def handle_offset_fetch_response(self, response): """ Handler for offset fetch api responses. Sets the corresponding entry in the ``self.offsets`` structure for successful partition responses. Raises a ``NoOffsetsError`` exception if a fatal, non-retriable error is encountered. Returns ``True`` if the operation should be retried, ``False`` if not. """ retry = False topic = response.topics[0].name for partition in response.topics[0].partitions: code = partition.error_code if code == errors.no_error: log.debug( "Got offset %d for group %s topic %s partition %d", partition.offset, self.group_name, topic, partition.partition_id ) self.offsets[topic][partition.partition_id] = partition.offset elif code == errors.offsets_load_in_progress: log.info( "Offsets load in progress for topic %s partition %s" + " retrying offset fetch.", topic, partition.partition_id ) retry = True elif code in errors.retriable: self.heal_cluster = True retry = True else: log.error( "Got error %s for topic %s partition %s", constants.ERROR_CODES[code], topic, partition.partition_id ) raise exc.NoOffsetsError return retry @gen.coroutine def commit_offsets(self, metadata=None): """ Notifies Kafka that the consumer's messages have been processed. Uses the "v0" version of the offset commit request to maintain compatability with clusters running 0.8.1. """ if metadata is None: metadata = "committed by %s" % self.name log.debug("Committing offsets for consumer group %s", self.group_name) request = offset_commit.OffsetCommitV0Request( group=self.group_name, topics=[ offset_commit.TopicRequest( name=topic, partitions=[ offset_commit.PartitionRequest( partition_id=partition_id, offset=self.offsets[topic][partition_id], metadata=metadata ) for partition_id in partition_ids ] ) for topic, partition_ids in six.iteritems(self.allocation) if topic in self.topics_to_commit ] ) results = yield self.send({self.coordinator_id: request}) retry, adjust_metadata = results[self.coordinator_id] if adjust_metadata: log.warn("Offset commit metadata '%s' was too long.", metadata) metadata = "" if retry: yield self.commit_offsets(metadata=metadata) def handle_offset_commit_response(self, response): """ Handles responses from the "offset commit" api. For successful responses the affected topics are dropped from the set of topics that need commits. In the special case of an ``offset_metadata_too_large`` error code the commit is retried with a blank metadata string. """ retry = False adjust_metadata = False for topic in response.topics: for partition in topic.partitions: code = partition.error_code if code == errors.no_error: self.topics_to_commit.discard(topic.name) elif code in errors.retriable: retry = True self.heal_cluster = True elif code == errors.offset_metadata_too_large: retry = True adjust_metadata = True else: log.error( "Got error %s for topic %s partition %s", constants.ERROR_CODES[code], topic, partition.partition_id ) return (retry, adjust_metadata) @gen.coroutine def wind_down(self): """ Winding down calls ``stop()`` on the allocator. """ yield self.allocator.stop()