Exemplo n.º 1
0
    def get_partition_offsets(self, topic, partition, request_time_ms, max_num_offsets):
        """Request available fetch offsets for a single topic/partition

        Keyword Arguments:
            topic (str): topic for offset request
            partition (int): partition for offset request
            request_time_ms (int): Used to ask for all messages before a
                certain time (ms). There are two special values.
                Specify -1 to receive the latest offset (i.e. the offset of the
                next coming message) and -2 to receive the earliest available
                offset. Note that because offsets are pulled in descending
                order, asking for the earliest offset will always return you a
                single element.
            max_num_offsets (int): Maximum offsets to include in the OffsetResponse

        Returns:
            a list of offsets in the OffsetResponse submitted for the provided
            topic / partition. See:
            https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-OffsetAPI
        """
        reqs = [OffsetRequest(topic, partition, request_time_ms, max_num_offsets)]

        (resp,) = self._client.send_offset_request(reqs)

        check_error(resp)

        # Just for sanity..
        # probably unnecessary
        assert resp.topic == topic
        assert resp.partition == partition

        return resp.offsets
Exemplo n.º 2
0
    def fetch_last_known_offsets(self, partitions=None):
        if self.group is None:
            raise ValueError('KafkaClient.group must not be None')

        if partitions is None:
            partitions = self.client.get_partition_ids_for_topic(self.topic)

        responses = self.client.send_offset_fetch_request(
            self.group,
            [OffsetFetchRequest(self.topic, p) for p in partitions],
            fail_on_error=False)

        for resp in responses:
            try:
                check_error(resp)
            # API spec says server wont set an error here
            # but 0.8.1.1 does actually...
            except UnknownTopicOrPartitionError:
                pass

            # -1 offset signals no commit is currently stored
            if resp.offset == -1:
                self.offsets[resp.partition] = 0

            # Otherwise we committed the stored offset
            # and need to fetch the next one
            else:
                self.offsets[resp.partition] = resp.offset
Exemplo n.º 3
0
    def fetch_last_known_offsets(self, partitions=None):
        if self.group is None:
            raise ValueError('KafkaClient.group must not be None')

        if partitions is None:
            partitions = self.client.get_partition_ids_for_topic(self.topic)

        responses = self.client.send_offset_fetch_request(
            self.group,
            [OffsetFetchRequest(self.topic, p) for p in partitions],
            fail_on_error=False
        )

        for resp in responses:
            try:
                check_error(resp)
            # API spec says server wont set an error here
            # but 0.8.1.1 does actually...
            except UnknownTopicOrPartitionError:
                pass

            # -1 offset signals no commit is currently stored
            if resp.offset == -1:
                self.offsets[resp.partition] = 0

            # Otherwise we committed the stored offset
            # and need to fetch the next one
            else:
                self.offsets[resp.partition] = resp.offset
Exemplo n.º 4
0
    def _raise_on_response_error(self, resp):

        # Response can be an unraised exception object (FailedPayloadsError)
        if isinstance(resp, Exception):
            raise resp

        # Or a server api error response
        try:
            kafka_common.check_error(resp)
        except (UnknownTopicOrPartitionError, NotLeaderForPartitionError):
            self.reset_topic_metadata(resp.topic)
            raise

        # Return False if no error to enable list comprehensions
        return False
Exemplo n.º 5
0
    def _raise_on_response_error(self, resp):

        # Response can be an unraised exception object (FailedPayloadsError)
        if isinstance(resp, Exception):
            raise resp

        # Or a server api error response
        try:
            kafka_common.check_error(resp)
        except (UnknownTopicOrPartitionError, NotLeaderForPartitionError):
            self.reset_topic_metadata(resp.topic)
            raise

        # Return False if no error to enable list comprehensions
        return False
Exemplo n.º 6
0
    def commit_partition_offsets(self, partition_offsets):
        """
        Commit explicit partition/offset pairs.
        """
        self.logger.debug("Committing partition offsets: %s", partition_offsets)

        commit_requests = [
            OffsetCommitRequest(self.consumer.topic, partition, offset, None)
            for partition, offset in partition_offsets.items()
        ]
        commit_responses = self.consumer.client.send_offset_commit_request(
            self.consumer.group,
            commit_requests,
        )
        for commit_response in commit_responses:
            check_error(commit_response)
Exemplo n.º 7
0
    def commit_partition_offsets(self, partition_offsets):
        """
        Commit explicit partition/offset pairs.
        """
        self.logger.debug("Committing partition offsets: %s", partition_offsets)

        commit_requests = [
            OffsetCommitRequest(self.consumer.topic, partition, offset, None)
            for partition, offset in partition_offsets.items()
        ]
        commit_responses = self.consumer.client.send_offset_commit_request(
            self.consumer.group,
            commit_requests,
        )
        for commit_response in commit_responses:
            check_error(commit_response)
Exemplo n.º 8
0
    def _get_coordinator_for_group(self, group):
        """
        Returns the coordinator broker for a consumer group.

        ConsumerCoordinatorNotAvailableCode will be raised if the coordinator
        does not currently exist for the group.

        OffsetsLoadInProgressCode is raised if the coordinator is available
        but is still loading offsets from the internal topic
        """

        resp = self.send_consumer_metadata_request(group)

        # If there's a problem with finding the coordinator, raise the
        # provided error
        kafka_common.check_error(resp)

        # Otherwise return the BrokerMetadata
        return BrokerMetadata(resp.nodeId, resp.host, resp.port)
Exemplo n.º 9
0
    def _get_coordinator_for_group(self, group):
        """
        Returns the coordinator broker for a consumer group.

        ConsumerCoordinatorNotAvailableCode will be raised if the coordinator
        does not currently exist for the group.

        OffsetsLoadInProgressCode is raised if the coordinator is available
        but is still loading offsets from the internal topic
        """

        resp = self.send_consumer_metadata_request(group)

        # If there's a problem with finding the coordinator, raise the
        # provided error
        kafka_common.check_error(resp)

        # Otherwise return the BrokerMetadata
        return BrokerMetadata(resp.nodeId, resp.host, resp.port)
Exemplo n.º 10
0
    def _get_commit_offsets(self):
        logger.info("Consumer fetching stored offsets")
        for topic_partition in self._topics:
            (resp,) = self._client.send_offset_fetch_request(
                kafka_bytestring(self._config['group_id']),
                [OffsetFetchRequest(topic_partition[0], topic_partition[1])],
                fail_on_error=False)
            try:
                check_error(resp)
            # API spec says server wont set an error here
            # but 0.8.1.1 does actually...
            except UnknownTopicOrPartitionError:
                pass

            # -1 offset signals no commit is currently stored
            if resp.offset == -1:
                self._offsets.commit[topic_partition] = None

            # Otherwise we committed the stored offset
            # and need to fetch the next one
            else:
                self._offsets.commit[topic_partition] = resp.offset
Exemplo n.º 11
0
    def commit(self):
        """Store consumed message offsets (marked via task_done())
        to kafka cluster for this consumer_group.

        Returns:
            True on success, or False if no offsets were found for commit

        Note:
            this functionality requires server version >=0.8.1.1
            https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-OffsetCommit/FetchAPI
        """
        if not self._config['group_id']:
            logger.warning('Cannot commit without a group_id!')
            raise KafkaConfigurationError(
                'Attempted to commit offsets '
                'without a configured consumer group (group_id)'
            )

        # API supports storing metadata with each commit
        # but for now it is unused
        metadata = b''

        offsets = self._offsets.task_done
        commits = []
        for topic_partition, task_done_offset in six.iteritems(offsets):

            # Skip if None
            if task_done_offset is None:
                continue

            # Commit offsets as the next offset to fetch
            # which is consistent with the Java Client
            # task_done is marked by messages consumed,
            # so add one to mark the next message for fetching
            commit_offset = (task_done_offset + 1)

            # Skip if no change from previous committed
            if commit_offset == self._offsets.commit[topic_partition]:
                continue

            commits.append(
                OffsetCommitRequest(topic_partition[0], topic_partition[1],
                                    commit_offset, metadata)
            )

        if commits:
            logger.info('committing consumer offsets to group %s', self._config['group_id'])
            resps = self._client.send_offset_commit_request(
                kafka_bytestring(self._config['group_id']), commits,
                fail_on_error=False
            )

            for r in resps:
                check_error(r)
                topic_partition = (r.topic, r.partition)
                task_done = self._offsets.task_done[topic_partition]
                self._offsets.commit[topic_partition] = (task_done + 1)

            if self._config['auto_commit_enable']:
                self._reset_auto_commit()

            return True

        else:
            logger.info('No new offsets found to commit in group %s', self._config['group_id'])
            return False
Exemplo n.º 12
0
    def fetch_messages(self):
        """Sends FetchRequests for all topic/partitions set for consumption

        Returns:
            Generator that yields KafkaMessage structs
            after deserializing with the configured `deserializer_class`

        Note:
            Refreshes metadata on errors, and resets fetch offset on
            OffsetOutOfRange, per the configured `auto_offset_reset` policy

        See Also:
            Key KafkaConsumer configuration parameters:
            * `fetch_message_max_bytes`
            * `fetch_max_wait_ms`
            * `fetch_min_bytes`
            * `deserializer_class`
            * `auto_offset_reset`

        """

        max_bytes = self._config['fetch_message_max_bytes']
        max_wait_time = self._config['fetch_wait_max_ms']
        min_bytes = self._config['fetch_min_bytes']

        if not self._topics:
            raise KafkaConfigurationError('No topics or partitions configured')

        if not self._offsets.fetch:
            raise KafkaConfigurationError(
                'No fetch offsets found when calling fetch_messages'
            )

        fetches = [FetchRequest(topic, partition,
                                self._offsets.fetch[(topic, partition)],
                                max_bytes)
                   for (topic, partition) in self._topics]

        # send_fetch_request will batch topic/partition requests by leader
        responses = self._client.send_fetch_request(
            fetches,
            max_wait_time=max_wait_time,
            min_bytes=min_bytes,
            fail_on_error=False
        )

        for resp in responses:

            if isinstance(resp, FailedPayloadsError):
                logger.warning('FailedPayloadsError attempting to fetch data')
                self._refresh_metadata_on_error()
                continue

            topic = kafka_bytestring(resp.topic)
            partition = resp.partition
            try:
                check_error(resp)
            except OffsetOutOfRangeError:
                logger.warning('OffsetOutOfRange: topic %s, partition %d, '
                               'offset %d (Highwatermark: %d)',
                               topic, partition,
                               self._offsets.fetch[(topic, partition)],
                               resp.highwaterMark)
                # Reset offset
                self._offsets.fetch[(topic, partition)] = (
                    self._reset_partition_offset((topic, partition))
                )
                continue

            except NotLeaderForPartitionError:
                logger.warning("NotLeaderForPartitionError for %s - %d. "
                               "Metadata may be out of date",
                               topic, partition)
                self._refresh_metadata_on_error()
                continue

            except RequestTimedOutError:
                logger.warning("RequestTimedOutError for %s - %d",
                               topic, partition)
                continue

            # Track server highwater mark
            self._offsets.highwater[(topic, partition)] = resp.highwaterMark

            # Yield each message
            # Kafka-python could raise an exception during iteration
            # we are not catching -- user will need to address
            for (offset, message) in resp.messages:
                # deserializer_class could raise an exception here
                val = self._config['deserializer_class'](message.value)
                msg = KafkaMessage(topic, partition, offset, message.key, val)

                # in some cases the server will return earlier messages
                # than we requested. skip them per kafka spec
                if offset < self._offsets.fetch[(topic, partition)]:
                    logger.debug('message offset less than fetched offset '
                                 'skipping: %s', msg)
                    continue
                # Only increment fetch offset
                # if we safely got the message and deserialized
                self._offsets.fetch[(topic, partition)] = offset + 1

                # Then yield to user
                yield msg
Exemplo n.º 13
0
    def load_metadata_for_topics(self, *topics):
        """
        Fetch broker and topic-partition metadata from the server,
        and update internal data:
        broker list, topic/partition list, and topic/parition -> broker map

        This method should be called after receiving any error

        Arguments:
            *topics (optional): If a list of topics is provided,
                the metadata refresh will be limited to the specified topics only.

        Exceptions:
        ----------
        If the broker is configured to not auto-create topics,
        expect UnknownTopicOrPartitionError for topics that don't exist

        If the broker is configured to auto-create topics,
        expect LeaderNotAvailableError for new topics
        until partitions have been initialized.

        Exceptions *will not* be raised in a full refresh (i.e. no topic list)
        In this case, error codes will be logged as errors

        Partition-level errors will also not be raised here
        (a single partition w/o a leader, for example)
        """
        topics = [kafka_bytestring(t) for t in topics]

        if topics:
            for topic in topics:
                self.reset_topic_metadata(topic)
        else:
            self.reset_all_metadata()

        resp = self.send_metadata_request(topics)

        log.debug('Updating broker metadata: %s', resp.brokers)
        log.debug('Updating topic metadata: %s', resp.topics)

        self.brokers = dict([(broker.nodeId, broker)
                             for broker in resp.brokers])

        for topic_metadata in resp.topics:
            topic = topic_metadata.topic
            partitions = topic_metadata.partitions

            # Errors expected for new topics
            try:
                kafka_common.check_error(topic_metadata)
            except (UnknownTopicOrPartitionError,
                    LeaderNotAvailableError) as e:

                # Raise if the topic was passed in explicitly
                if topic in topics:
                    raise

                # Otherwise, just log a warning
                log.error('Error loading topic metadata for %s: %s', topic,
                          type(e))
                continue

            self.topic_partitions[topic] = {}
            for partition_metadata in partitions:
                partition = partition_metadata.partition
                leader = partition_metadata.leader

                self.topic_partitions[topic][partition] = partition_metadata

                # Populate topics_to_brokers dict
                topic_part = TopicAndPartition(topic, partition)

                # Check for partition errors
                try:
                    kafka_common.check_error(partition_metadata)

                # If No Leader, topics_to_brokers topic_partition -> None
                except LeaderNotAvailableError:
                    log.error('No leader for topic %s partition %d', topic,
                              partition)
                    self.topics_to_brokers[topic_part] = None
                    continue
                # If one of the replicas is unavailable -- ignore
                # this error code is provided for admin purposes only
                # we never talk to replicas, only the leader
                except ReplicaNotAvailableError:
                    log.debug(
                        'Some (non-leader) replicas not available for topic %s partition %d',
                        topic, partition)

                # If Known Broker, topic_partition -> BrokerMetadata
                if leader in self.brokers:
                    self.topics_to_brokers[topic_part] = self.brokers[leader]

                # If Unknown Broker, fake BrokerMetadata so we dont lose the id
                # (not sure how this could happen. server could be in bad state)
                else:
                    self.topics_to_brokers[topic_part] = BrokerMetadata(
                        leader, None, None)
Exemplo n.º 14
0
    def load_metadata_for_topics(self, *topics):
        """
        Fetch broker and topic-partition metadata from the server,
        and update internal data:
        broker list, topic/partition list, and topic/parition -> broker map

        This method should be called after receiving any error

        Arguments:
            *topics (optional): If a list of topics is provided,
                the metadata refresh will be limited to the specified topics only.

        Exceptions:
        ----------
        If the broker is configured to not auto-create topics,
        expect UnknownTopicOrPartitionError for topics that don't exist

        If the broker is configured to auto-create topics,
        expect LeaderNotAvailableError for new topics
        until partitions have been initialized.

        Exceptions *will not* be raised in a full refresh (i.e. no topic list)
        In this case, error codes will be logged as errors

        Partition-level errors will also not be raised here
        (a single partition w/o a leader, for example)
        """
        topics = [kafka_bytestring(t) for t in topics]

        if topics:
            for topic in topics:
                self.reset_topic_metadata(topic)
        else:
            self.reset_all_metadata()

        resp = self.send_metadata_request(topics)

        log.debug('Updating broker metadata: %s', resp.brokers)
        log.debug('Updating topic metadata: %s', resp.topics)

        self.brokers = dict([(broker.nodeId, broker)
                             for broker in resp.brokers])

        for topic_metadata in resp.topics:
            topic = topic_metadata.topic
            partitions = topic_metadata.partitions

            # Errors expected for new topics
            try:
                kafka_common.check_error(topic_metadata)
            except (UnknownTopicOrPartitionError, LeaderNotAvailableError) as e:

                # Raise if the topic was passed in explicitly
                if topic in topics:
                    raise

                # Otherwise, just log a warning
                log.error('Error loading topic metadata for %s: %s', topic, type(e))
                continue

            self.topic_partitions[topic] = {}
            for partition_metadata in partitions:
                partition = partition_metadata.partition
                leader = partition_metadata.leader

                self.topic_partitions[topic][partition] = partition_metadata

                # Populate topics_to_brokers dict
                topic_part = TopicAndPartition(topic, partition)

                # Check for partition errors
                try:
                    kafka_common.check_error(partition_metadata)

                # If No Leader, topics_to_brokers topic_partition -> None
                except LeaderNotAvailableError:
                    log.error('No leader for topic %s partition %d', topic, partition)
                    self.topics_to_brokers[topic_part] = None
                    continue
                # If one of the replicas is unavailable -- ignore
                # this error code is provided for admin purposes only
                # we never talk to replicas, only the leader
                except ReplicaNotAvailableError:
                    log.debug('Some (non-leader) replicas not available for topic %s partition %d',
                              topic, partition)

                # If Known Broker, topic_partition -> BrokerMetadata
                if leader in self.brokers:
                    self.topics_to_brokers[topic_part] = self.brokers[leader]

                # If Unknown Broker, fake BrokerMetadata so we dont lose the id
                # (not sure how this could happen. server could be in bad state)
                else:
                    self.topics_to_brokers[topic_part] = BrokerMetadata(
                        leader, None, None
                    )