def _decode_message_set_iter(cls, data): """ Iteratively decode a MessageSet Reads repeated elements of (offset, message), calling decode_message to decode a single message. Since compressed messages contain futher MessageSets, these two methods have been decoupled so that they may recurse easily. """ cur = 0 read_message = False while cur < len(data): try: ((offset, ), cur) = relative_unpack('>q', data, cur) (msg, cur) = read_int_string(data, cur) for (offset, message) in KafkaProtocol._decode_message(msg, offset): read_message = True yield OffsetAndMessage(offset, message) except BufferUnderflowError: # NOTE: Not sure this is correct error handling: # Is it possible to get a BUE if the message set is somewhere # in the middle of the fetch response? If so, we probably have # an issue that's not fetch size too small. # Aren't we ignoring errors if we fail to unpack data by # raising StopIteration()? # If _decode_message() raises a ChecksumError, couldn't that # also be due to the fetch size being too small? if read_message is False: # If we get a partial read of a message, but haven't # yielded anything there's a problem raise ConsumerFetchSizeTooSmall() else: raise StopIteration()
def _decode_message_set_iter(cls, data): """ Iteratively decode a MessageSet Reads repeated elements of (offset, message), calling decode_message to decode a single message. Since compressed messages contain futher MessageSets, these two methods have been decoupled so that they may recurse easily. """ cur = 0 read_message = False while cur < len(data): try: ((offset, ), cur) = relative_unpack('>q', data, cur) (msg, cur) = read_int_string(data, cur) for (offset, message) in KafkaProtocol._decode_message(msg, offset): read_message = True yield OffsetAndMessage(offset, message) except BufferUnderflowError: if read_message is False: # If we get a partial read of a message, but haven't yielded anyhting # there's a problem raise ConsumerFetchSizeTooSmall() else: raise StopIteration()
def _fetch(self): # Create fetch request payloads for all the partitions partitions = dict( (p, self.buffer_size) for p in self.fetch_offsets.keys()) while partitions: requests = [] for partition, buffer_size in six.iteritems(partitions): requests.append( FetchRequestPayload(self.topic, partition, self.fetch_offsets[partition], buffer_size)) # Send request responses = self.client.send_fetch_request( requests, max_wait_time=int(self.fetch_max_wait_time), min_bytes=self.fetch_min_bytes, fail_on_error=False) retry_partitions = {} for resp in responses: try: check_error(resp) except UnknownTopicOrPartitionError: log.error('UnknownTopicOrPartitionError for %s:%d', resp.topic, resp.partition) self.client.reset_topic_metadata(resp.topic) raise except NotLeaderForPartitionError: log.error('NotLeaderForPartitionError for %s:%d', resp.topic, resp.partition) self.client.reset_topic_metadata(resp.topic) continue except OffsetOutOfRangeError: log.warning( 'OffsetOutOfRangeError for %s:%d. ' 'Resetting partition offset...', resp.topic, resp.partition) self.reset_partition_offset(resp.partition) # Retry this partition retry_partitions[resp.partition] = partitions[ resp.partition] continue except FailedPayloadsError as e: log.warning('FailedPayloadsError for %s:%d', e.payload.topic, e.payload.partition) # Retry this partition retry_partitions[e.payload.partition] = partitions[ e.payload.partition] continue partition = resp.partition buffer_size = partitions[partition] # Check for partial message if resp.messages and isinstance(resp.messages[-1].message, PartialMessage): # If buffer is at max and all we got was a partial message # raise ConsumerFetchSizeTooSmall if (self.max_buffer_size is not None and buffer_size == self.max_buffer_size and len(resp.messages) == 1): log.error('Max fetch size %d too small', self.max_buffer_size) raise ConsumerFetchSizeTooSmall() if self.max_buffer_size is None: buffer_size *= 2 else: buffer_size = min(buffer_size * 2, self.max_buffer_size) log.warning( 'Fetch size too small, increase to %d (2x) ' 'and retry', buffer_size) retry_partitions[partition] = buffer_size resp.messages.pop() for message in resp.messages: if message.offset < self.fetch_offsets[partition]: log.debug( 'Skipping message %s because its offset is less than the consumer offset', message) continue # Put the message in our queue self.queue.put((partition, message)) self.fetch_offsets[partition] = message.offset + 1 partitions = retry_partitions