def test_consume_none(self): fetch = FetchRequestPayload(self.topic, 0, 0, 1024) fetch_resp, = self.client.send_fetch_request([fetch]) self.assertEqual(fetch_resp.error, 0) self.assertEqual(fetch_resp.topic, self.topic) self.assertEqual(fetch_resp.partition, 0) messages = list(fetch_resp.messages) self.assertEqual(len(messages), 0)
def assert_fetch_offset(self, partition, start_offset, expected_messages): # There should only be one response message from the server. # This will throw an exception if there's more than one. resp, = self.client.send_fetch_request([FetchRequestPayload(self.topic, partition, start_offset, 1024)]) self.assertEqual(resp.error, 0) self.assertEqual(resp.partition, partition) messages = [ x.message.value for x in resp.messages ] self.assertEqual(messages, expected_messages) self.assertEqual(resp.highwaterMark, start_offset+len(expected_messages))
def _fetch(self): # Create fetch request payloads for all the partitions partitions = dict((p, self.buffer_size) for p in self.fetch_offsets.keys()) while partitions: requests = [] for partition, buffer_size in six.iteritems(partitions): requests.append(FetchRequestPayload(self.topic, partition, self.fetch_offsets[partition], buffer_size)) # Send request responses = self.client.send_fetch_request( requests, max_wait_time=int(self.fetch_max_wait_time), min_bytes=self.fetch_min_bytes, fail_on_error=False ) retry_partitions = {} for resp in responses: try: check_error(resp) except UnknownTopicOrPartitionError: log.error('UnknownTopicOrPartitionError for %s:%d', resp.topic, resp.partition) self.client.reset_topic_metadata(resp.topic) raise except NotLeaderForPartitionError: log.error('NotLeaderForPartitionError for %s:%d', resp.topic, resp.partition) self.client.reset_topic_metadata(resp.topic) continue except OffsetOutOfRangeError: log.warning('OffsetOutOfRangeError for %s:%d. ' 'Resetting partition offset...', resp.topic, resp.partition) self.reset_partition_offset(resp.partition) # Retry this partition retry_partitions[resp.partition] = partitions[resp.partition] continue except FailedPayloadsError as e: log.warning('FailedPayloadsError for %s:%d', e.payload.topic, e.payload.partition) # Retry this partition retry_partitions[e.payload.partition] = partitions[e.payload.partition] continue partition = resp.partition buffer_size = partitions[partition] # Check for partial message if resp.messages and isinstance(resp.messages[-1].message, PartialMessage): # If buffer is at max and all we got was a partial message # raise ConsumerFetchSizeTooSmall if (self.max_buffer_size is not None and buffer_size == self.max_buffer_size and len(resp.messages) == 1): log.error('Max fetch size %d too small', self.max_buffer_size) raise ConsumerFetchSizeTooSmall() if self.max_buffer_size is None: buffer_size *= 2 else: buffer_size = min(buffer_size * 2, self.max_buffer_size) log.warning('Fetch size too small, increase to %d (2x) ' 'and retry', buffer_size) retry_partitions[partition] = buffer_size resp.messages.pop() for message in resp.messages: if message.offset < self.fetch_offsets[partition]: log.debug('Skipping message %s because its offset is less than the consumer offset', message) continue # Put the message in our queue self.queue.put((partition, message)) self.fetch_offsets[partition] = message.offset + 1 partitions = retry_partitions
def fetch_messages(self): """Sends FetchRequests for all topic/partitions set for consumption Returns: Generator that yields KafkaMessage structs after deserializing with the configured `deserializer_class` Note: Refreshes metadata on errors, and resets fetch offset on OffsetOutOfRange, per the configured `auto_offset_reset` policy See Also: Key KafkaConsumer configuration parameters: * `fetch_message_max_bytes` * `fetch_max_wait_ms` * `fetch_min_bytes` * `deserializer_class` * `auto_offset_reset` """ max_bytes = self._config['fetch_message_max_bytes'] max_wait_time = self._config['fetch_wait_max_ms'] min_bytes = self._config['fetch_min_bytes'] if not self._topics: raise KafkaConfigurationError('No topics or partitions configured') if not self._offsets.fetch: raise KafkaConfigurationError( 'No fetch offsets found when calling fetch_messages') fetches = [ FetchRequestPayload(topic, partition, self._offsets.fetch[(topic, partition)], max_bytes) for (topic, partition) in self._topics ] # send_fetch_request will batch topic/partition requests by leader responses = self._client.send_fetch_request( fetches, max_wait_time=max_wait_time, min_bytes=min_bytes, fail_on_error=False) for resp in responses: if isinstance(resp, FailedPayloadsError): self.metrics.record('failed-payloads', 1) logger.warning('FailedPayloadsError attempting to fetch data') self._refresh_metadata_on_error() continue topic = resp.topic partition = resp.partition try: check_error(resp) except OffsetOutOfRangeError: self.metrics.record('offset-out-of-range', 1) logger.warning( 'OffsetOutOfRange: topic %s, partition %d, ' 'offset %d (Highwatermark: %d)', topic, partition, self._offsets.fetch[(topic, partition)], resp.highwaterMark) # Reset offset self._offsets.fetch[(topic, partition)] = ( self._reset_partition_offset((topic, partition))) continue except NotLeaderForPartitionError: self.metrics.record('not-leader-for-partition', 1) logger.warning( "NotLeaderForPartitionError for %s - %d. " "Metadata may be out of date", topic, partition) self._refresh_metadata_on_error() continue except RequestTimedOutError: self.metrics.record('request-timed-out', 1) logger.warning("RequestTimedOutError for %s - %d", topic, partition) continue # Track server highwater mark self._offsets.highwater[(topic, partition)] = resp.highwaterMark # Check for partial message and remove if resp.messages and isinstance(resp.messages[-1].message, PartialMessage): resp.messages.pop() # Yield each message # Kafka-python could raise an exception during iteration # we are not catching -- user will need to address for (offset, message) in resp.messages: # deserializer_class could raise an exception here val = self._config['deserializer_class'](message.value) msg = KafkaMessage(topic, partition, offset, message.key, val) # in some cases the server will return earlier messages # than we requested. skip them per kafka spec if offset < self._offsets.fetch[(topic, partition)]: logger.debug( 'message offset less than fetched offset ' 'skipping: %s', msg) continue # Only increment fetch offset # if we safely got the message and deserialized self._offsets.fetch[(topic, partition)] = offset + 1 # Then yield to user yield msg