示例#1
0
    def test_consume_none(self):
        fetch = FetchRequestPayload(self.topic, 0, 0, 1024)

        fetch_resp, = self.client.send_fetch_request([fetch])
        self.assertEqual(fetch_resp.error, 0)
        self.assertEqual(fetch_resp.topic, self.topic)
        self.assertEqual(fetch_resp.partition, 0)

        messages = list(fetch_resp.messages)
        self.assertEqual(len(messages), 0)
示例#2
0
    def assert_fetch_offset(self, partition, start_offset, expected_messages):
        # There should only be one response message from the server.
        # This will throw an exception if there's more than one.

        resp, = self.client.send_fetch_request([FetchRequestPayload(self.topic, partition, start_offset, 1024)])

        self.assertEqual(resp.error, 0)
        self.assertEqual(resp.partition, partition)
        messages = [ x.message.value for x in resp.messages ]

        self.assertEqual(messages, expected_messages)
        self.assertEqual(resp.highwaterMark, start_offset+len(expected_messages))
示例#3
0
    def _fetch(self):
        # Create fetch request payloads for all the partitions
        partitions = dict((p, self.buffer_size)
                      for p in self.fetch_offsets.keys())
        while partitions:
            requests = []
            for partition, buffer_size in six.iteritems(partitions):
                requests.append(FetchRequestPayload(self.topic, partition,
                                                    self.fetch_offsets[partition],
                                                    buffer_size))
            # Send request
            responses = self.client.send_fetch_request(
                requests,
                max_wait_time=int(self.fetch_max_wait_time),
                min_bytes=self.fetch_min_bytes,
                fail_on_error=False
            )

            retry_partitions = {}
            for resp in responses:

                try:
                    check_error(resp)
                except UnknownTopicOrPartitionError:
                    log.error('UnknownTopicOrPartitionError for %s:%d',
                              resp.topic, resp.partition)
                    self.client.reset_topic_metadata(resp.topic)
                    raise
                except NotLeaderForPartitionError:
                    log.error('NotLeaderForPartitionError for %s:%d',
                              resp.topic, resp.partition)
                    self.client.reset_topic_metadata(resp.topic)
                    continue
                except OffsetOutOfRangeError:
                    log.warning('OffsetOutOfRangeError for %s:%d. '
                                'Resetting partition offset...',
                                resp.topic, resp.partition)
                    self.reset_partition_offset(resp.partition)
                    # Retry this partition
                    retry_partitions[resp.partition] = partitions[resp.partition]
                    continue
                except FailedPayloadsError as e:
                    log.warning('FailedPayloadsError for %s:%d',
                                e.payload.topic, e.payload.partition)
                    # Retry this partition
                    retry_partitions[e.payload.partition] = partitions[e.payload.partition]
                    continue

                partition = resp.partition
                buffer_size = partitions[partition]

                # Check for partial message
                if resp.messages and isinstance(resp.messages[-1].message, PartialMessage):

                    # If buffer is at max and all we got was a partial message
                    # raise ConsumerFetchSizeTooSmall
                    if (self.max_buffer_size is not None and
                        buffer_size == self.max_buffer_size and
                        len(resp.messages) == 1):

                        log.error('Max fetch size %d too small', self.max_buffer_size)
                        raise ConsumerFetchSizeTooSmall()

                    if self.max_buffer_size is None:
                        buffer_size *= 2
                    else:
                        buffer_size = min(buffer_size * 2, self.max_buffer_size)
                    log.warning('Fetch size too small, increase to %d (2x) '
                                'and retry', buffer_size)
                    retry_partitions[partition] = buffer_size
                    resp.messages.pop()

                for message in resp.messages:
                    if message.offset < self.fetch_offsets[partition]:
                        log.debug('Skipping message %s because its offset is less than the consumer offset',
                                  message)
                        continue
                    # Put the message in our queue
                    self.queue.put((partition, message))
                    self.fetch_offsets[partition] = message.offset + 1
            partitions = retry_partitions
示例#4
0
    def fetch_messages(self):
        """Sends FetchRequests for all topic/partitions set for consumption

        Returns:
            Generator that yields KafkaMessage structs
            after deserializing with the configured `deserializer_class`

        Note:
            Refreshes metadata on errors, and resets fetch offset on
            OffsetOutOfRange, per the configured `auto_offset_reset` policy

        See Also:
            Key KafkaConsumer configuration parameters:
            * `fetch_message_max_bytes`
            * `fetch_max_wait_ms`
            * `fetch_min_bytes`
            * `deserializer_class`
            * `auto_offset_reset`

        """

        max_bytes = self._config['fetch_message_max_bytes']
        max_wait_time = self._config['fetch_wait_max_ms']
        min_bytes = self._config['fetch_min_bytes']

        if not self._topics:
            raise KafkaConfigurationError('No topics or partitions configured')

        if not self._offsets.fetch:
            raise KafkaConfigurationError(
                'No fetch offsets found when calling fetch_messages')

        fetches = [
            FetchRequestPayload(topic, partition,
                                self._offsets.fetch[(topic, partition)],
                                max_bytes)
            for (topic, partition) in self._topics
        ]

        # send_fetch_request will batch topic/partition requests by leader
        responses = self._client.send_fetch_request(
            fetches,
            max_wait_time=max_wait_time,
            min_bytes=min_bytes,
            fail_on_error=False)

        for resp in responses:

            if isinstance(resp, FailedPayloadsError):
                self.metrics.record('failed-payloads', 1)

                logger.warning('FailedPayloadsError attempting to fetch data')
                self._refresh_metadata_on_error()
                continue

            topic = resp.topic
            partition = resp.partition
            try:
                check_error(resp)
            except OffsetOutOfRangeError:
                self.metrics.record('offset-out-of-range', 1)

                logger.warning(
                    'OffsetOutOfRange: topic %s, partition %d, '
                    'offset %d (Highwatermark: %d)', topic, partition,
                    self._offsets.fetch[(topic, partition)],
                    resp.highwaterMark)
                # Reset offset
                self._offsets.fetch[(topic, partition)] = (
                    self._reset_partition_offset((topic, partition)))
                continue

            except NotLeaderForPartitionError:
                self.metrics.record('not-leader-for-partition', 1)

                logger.warning(
                    "NotLeaderForPartitionError for %s - %d. "
                    "Metadata may be out of date", topic, partition)
                self._refresh_metadata_on_error()
                continue

            except RequestTimedOutError:
                self.metrics.record('request-timed-out', 1)

                logger.warning("RequestTimedOutError for %s - %d", topic,
                               partition)
                continue

            # Track server highwater mark
            self._offsets.highwater[(topic, partition)] = resp.highwaterMark

            # Check for partial message and remove
            if resp.messages and isinstance(resp.messages[-1].message,
                                            PartialMessage):
                resp.messages.pop()

            # Yield each message
            # Kafka-python could raise an exception during iteration
            # we are not catching -- user will need to address
            for (offset, message) in resp.messages:
                # deserializer_class could raise an exception here
                val = self._config['deserializer_class'](message.value)
                msg = KafkaMessage(topic, partition, offset, message.key, val)

                # in some cases the server will return earlier messages
                # than we requested. skip them per kafka spec
                if offset < self._offsets.fetch[(topic, partition)]:
                    logger.debug(
                        'message offset less than fetched offset '
                        'skipping: %s', msg)
                    continue
                # Only increment fetch offset
                # if we safely got the message and deserialized
                self._offsets.fetch[(topic, partition)] = offset + 1

                # Then yield to user
                yield msg