def test_encode_offset_fetch_request(self): header = "".join([ struct.pack('>i', 69), # Total message length struct.pack('>h', 9), # Message type = offset fetch struct.pack('>h', 0), # API version struct.pack('>i', 42), # Correlation ID struct.pack('>h9s', 9, "client_id"), # The client ID struct.pack('>h8s', 8, "group_id"), # The group to commit for struct.pack('>i', 2), # Num topics ]) topic1 = "".join([ struct.pack(">h6s", 6, "topic1"), # Topic for the request struct.pack(">i", 2), # Two partitions struct.pack(">i", 0), # Partition 0 struct.pack(">i", 1), # Partition 1 ]) topic2 = "".join([ struct.pack(">h6s", 6, "topic2"), # Topic for the request struct.pack(">i", 1), # One partitions struct.pack(">i", 2), # Partition 2 ]) expected1 = "".join([header, topic1, topic2]) expected2 = "".join([header, topic2, topic1]) encoded = KafkaProtocol.encode_offset_fetch_request( "client_id", 42, "group_id", [ OffsetFetchRequest("topic1", 0), OffsetFetchRequest("topic1", 1), OffsetFetchRequest("topic2", 2), ]) self.assertIn(encoded, [expected1, expected2])
def fetch_last_known_offsets(self, partitions=None): if self.group is None: raise ValueError('KafkaClient.group must not be None') if partitions is None: partitions = self.client.get_partition_ids_for_topic(self.topic) responses = self.client.send_offset_fetch_request( self.group, [OffsetFetchRequest(self.topic, p) for p in partitions], fail_on_error=False) for resp in responses: try: check_error(resp) # API spec says server wont set an error here # but 0.8.1.1 does actually... except UnknownTopicOrPartitionError: pass # -1 offset signals no commit is currently stored if resp.offset == -1: self.offsets[resp.partition] = 0 # Otherwise we committed the stored offset # and need to fetch the next one else: self.offsets[resp.partition] = resp.offset
def test_commit_fetch_offsets(self): req = OffsetCommitRequest(self.bytes_topic, 0, 42, b"metadata") (resp, ) = self.client.send_offset_commit_request(b"group", [req]) self.assertEqual(resp.error, 0) req = OffsetFetchRequest(self.bytes_topic, 0) (resp, ) = self.client.send_offset_fetch_request(b"group", [req]) self.assertEqual(resp.error, 0) self.assertEqual(resp.offset, 42) self.assertEqual(resp.metadata, b"") # Metadata isn't stored for now
def __init__(self, client, group, topic, partitions=None, auto_commit=True, auto_commit_every_n=AUTO_COMMIT_MSG_COUNT, auto_commit_every_t=AUTO_COMMIT_INTERVAL): self.client = client self.topic = topic self.group = group self.client.load_metadata_for_topics(topic) self.offsets = {} if not partitions: partitions = self.client.topic_partitions[topic] else: assert all(isinstance(x, numbers.Integral) for x in partitions) # Variables for handling offset commits self.commit_lock = Lock() self.commit_timer = None self.count_since_commit = 0 self.auto_commit = auto_commit self.auto_commit_every_n = auto_commit_every_n self.auto_commit_every_t = auto_commit_every_t # Set up the auto-commit timer if auto_commit is True and auto_commit_every_t is not None: self.commit_timer = ReentrantTimer(auto_commit_every_t, self.commit) self.commit_timer.start() def get_or_init_offset_callback(resp): try: kafka.common.check_error(resp) return resp.offset except kafka.common.UnknownTopicOrPartitionError: return 0 if auto_commit: for partition in partitions: req = OffsetFetchRequest(topic, partition) (offset, ) = self.client.send_offset_fetch_request( group, [req], callback=get_or_init_offset_callback, fail_on_error=False) self.offsets[partition] = offset else: for partition in partitions: self.offsets[partition] = 0
def __init__(self, client, group, topic, partitions=None, auto_commit=True, auto_commit_every_n=AUTO_COMMIT_MSG_COUNT, auto_commit_every_t=AUTO_COMMIT_INTERVAL): self.client = client self.topic = topic self.group = group self.client.load_metadata_for_topics(topic) self.offsets = {} if not partitions: partitions = self.client.topic_partitions[topic] # Variables for handling offset commits self.commit_lock = Lock() self.commit_timer = None self.count_since_commit = 0 self.auto_commit = auto_commit self.auto_commit_every_n = auto_commit_every_n self.auto_commit_every_t = auto_commit_every_t # Set up the auto-commit timer if auto_commit is True and auto_commit_every_t is not None: self.commit_timer = ReentrantTimer(auto_commit_every_t, self.commit) self.commit_timer.start() def get_or_init_offset_callback(resp): if resp.error == ErrorMapping.NO_ERROR: return resp.offset elif resp.error == ErrorMapping.UNKNOWN_TOPIC_OR_PARTITON: return 0 else: raise Exception("OffsetFetchRequest for topic=%s, " "partition=%d failed with errorcode=%s" % (resp.topic, resp.partition, resp.error)) # Uncomment for 0.8.1 for partition in partitions: req = OffsetFetchRequest(topic, partition) (offset, ) = self.client.send_offset_fetch_request( group, [req], callback=get_or_init_offset_callback, fail_on_error=False) self.offsets[partition] = offset
def _update_group_offsets(self): logger.info("Consumer fetching stored offsets") for partition in self._client.get_partition_ids_for_topic(self._topic): (resp, ) = self._client.send_offset_fetch_request( self._group_id, [OffsetFetchRequest(self._topic, partition)], fail_on_error=False) try: check_error(resp) except UnknownTopicOrPartitionError: pass if resp.offset == -1: self._offsets.commit[partition] = None else: self._offsets.commit[partition] = resp.offset
def fetch_last_known_offsets(self, partitions=None): if not partitions: partitions = self.client.get_partition_ids_for_topic(self.topic) def get_or_init_offset(resp): try: kafka.common.check_error(resp) return resp.offset except UnknownTopicOrPartitionError: return 0 for partition in partitions: req = OffsetFetchRequest(self.topic, partition) (resp, ) = self.client.send_offset_fetch_request( self.group, [req], fail_on_error=False) self.offsets[partition] = get_or_init_offset(resp) self.fetch_offsets = self.offsets.copy()
def fetch_last_known_offsets(self, partitions=None): yield from self._client.load_metadata_for_topics(self._topic) # if not partitions: partitions = self._client.get_partition_ids_for_topic(self._topic) for partition in partitions: req = OffsetFetchRequest(self._topic, partition) try: (resp, ) = yield from self._client.send_offset_fetch_request( self._group, [req]) partition_offset = resp.offset except UnknownTopicOrPartitionError: partition_offset = 0 self._offsets[partition] = partition_offset self._fetch_offsets = self._offsets.copy()
def _get_commit_offsets(self): logger.info("Consumer fetching stored offsets") for topic_partition in self._topics: (resp, ) = self._client.send_offset_fetch_request( kafka_bytestring(self._config['group_id']), [OffsetFetchRequest(topic_partition[0], topic_partition[1])], fail_on_error=False) try: check_error(resp) # API spec says server wont set an error here # but 0.8.1.1 does actually... except UnknownTopicOrPartitionError: pass # -1 offset signals no commit is currently stored if resp.offset == -1: self._offsets.commit[topic_partition] = None # Otherwise we committed the stored offset # and need to fetch the next one else: self._offsets.commit[topic_partition] = resp.offset
def get_current_consumer_offsets( kafka_client, group, topics, raise_on_error=True, offset_storage='zookeeper', ): """ Get current consumer offsets. NOTE: This method does not refresh client metadata. It is up to the caller to avoid using stale metadata. If any partition leader is not available, the request fails for all the other topics. This is the tradeoff of sending all topic requests in batch and save both in performance and Kafka load. :param kafka_client: a connected KafkaToolClient :param group: kafka group_id :param topics: topic list or dict {<topic>: [partitions]} :param raise_on_error: if False the method ignores missing topics and missing partitions. It still may fail on the request send. :param offset_storage: String, one of {zookeeper, kafka}. :returns: a dict topic: partition: offset :raises: :py:class:`kafka_utils.util.error.UnknownTopic`: upon missing topics and raise_on_error=True :py:class:`kafka_utils.util.error.UnknownPartition`: upon missing partitions and raise_on_error=True :py:class:`kafka_utils.util.error.InvalidOffsetStorageError: upon unknown offset_storage choice. FailedPayloadsError: upon send request error. """ topics = _verify_topics_and_partitions(kafka_client, topics, raise_on_error) group_offset_reqs = [ OffsetFetchRequest(kafka_bytestring(topic), partition) for topic, partitions in topics.iteritems() for partition in partitions ] group_offsets = {} if offset_storage == 'zookeeper': send_api = kafka_client.send_offset_fetch_request elif offset_storage == 'kafka': send_api = kafka_client.send_offset_fetch_request_kafka else: raise InvalidOffsetStorageError(offset_storage) if group_offset_reqs: # fail_on_error = False does not prevent network errors group_resps = send_api( group=kafka_bytestring(group), payloads=group_offset_reqs, fail_on_error=False, callback=pluck_topic_offset_or_zero_on_unknown, ) for resp in group_resps: group_offsets.setdefault( resp.topic, {}, )[resp.partition] = resp.offset return group_offsets