Exemplo n.º 1
0
    def commit(self, partitions=None):
        """
        Commit offsets for this consumer

        Keyword Arguments:
            partitions (list): list of partitions to commit, default is to commit
                all of them
        """

        # short circuit if nothing happened. This check is kept outside
        # to prevent un-necessarily acquiring a lock for checking the state
        if self.count_since_commit == 0:
            return

        with self.commit_lock:
            # Do this check again, just in case the state has changed
            # during the lock acquiring timeout
            if self.count_since_commit == 0:
                return

            reqs = []
            if not partitions:  # commit all partitions
                partitions = self.offsets.keys()

            for partition in partitions:
                offset = self.offsets[partition]
                log.debug("Commit offset %d in SimpleConsumer: "
                          "group=%s, topic=%s, partition=%s" %
                          (offset, self.group, self.topic, partition))

                reqs.append(
                    OffsetCommitRequest(self.topic, partition, offset, None))

            resps = self.client.send_offset_commit_request(self.group, reqs)
            for resp in resps:
                kafka.common.check_error(resp)

            self.count_since_commit = 0
Exemplo n.º 2
0
    def commit(self):
        """Store consumed message offsets (marked via task_done())
        to kafka cluster for this consumer_group.

        Returns:
            True on success, or False if no offsets were found for commit

        Note:
            this functionality requires server version >=0.8.1.1
            https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-OffsetCommit/FetchAPI
        """
        if not self._config['group_id']:
            logger.warning('Cannot commit without a group_id!')
            raise KafkaConfigurationError(
                'Attempted to commit offsets '
                'without a configured consumer group (group_id)')

        # API supports storing metadata with each commit
        # but for now it is unused
        metadata = b''

        offsets = self._offsets.task_done
        commits = []
        for topic_partition, task_done_offset in six.iteritems(offsets):

            # Skip if None
            if task_done_offset is None:
                continue

            # Commit offsets as the next offset to fetch
            # which is consistent with the Java Client
            # task_done is marked by messages consumed,
            # so add one to mark the next message for fetching
            commit_offset = (task_done_offset + 1)

            # Skip if no change from previous committed
            if commit_offset == self._offsets.commit[topic_partition]:
                continue

            commits.append(
                OffsetCommitRequest(topic_partition[0], topic_partition[1],
                                    commit_offset, metadata))

        if commits:
            logger.info('committing consumer offsets to group %s',
                        self._config['group_id'])
            resps = self._client.send_offset_commit_request(
                kafka_bytestring(self._config['group_id']),
                commits,
                fail_on_error=False)

            for r in resps:
                check_error(r)
                topic_partition = (r.topic, r.partition)
                task_done = self._offsets.task_done[topic_partition]
                self._offsets.commit[topic_partition] = (task_done + 1)

            if self._config['auto_commit_enable']:
                self._reset_auto_commit()

            return True

        else:
            logger.info('No new offsets found to commit in group %s',
                        self._config['group_id'])
            return False
Exemplo n.º 3
0
def set_consumer_offsets(
    kafka_client,
    group,
    new_offsets,
    raise_on_error=True,
    offset_storage='zookeeper',
):
    """Set consumer offsets to the specified offsets.

    This method does not validate the specified offsets, it is up to
    the caller to specify valid offsets within a topic partition.

    If any partition leader is not available, the request fails for all the
    other topics. This is the tradeoff of sending all topic requests in batch
    and save both in performance and Kafka load.

    :param kafka_client: a connected KafkaToolClient
    :param group: kafka group_id
    :param topics: dict {<topic>: {<partition>: <offset>}}
    :param raise_on_error: if False the method does not raise exceptions
      on errors encountered. It may still fail on the request send.
    :param offset_storage: String, one of {zookeeper, kafka}.
    :returns: a list of errors for each partition offset update that failed.
    :rtype: list [OffsetCommitError]
    :raises:
      :py:class:`kafka_utils.util.error.UnknownTopic`: upon missing
      topics and raise_on_error=True

      :py:class:`kafka_utils.util.error.UnknownPartition`: upon missing
      partitions and raise_on_error=True

      :py:class:`exceptions.TypeError`: upon badly formatted input
      new_offsets

      :py:class:`kafka_utils.util.error.InvalidOffsetStorageError: upon unknown
      offset_storage choice.

      FailedPayloadsError: upon send request error.
    """
    valid_new_offsets = _verify_commit_offsets_requests(
        kafka_client, new_offsets, raise_on_error)

    group_offset_reqs = [
        OffsetCommitRequest(kafka_bytestring(topic), partition, offset, None)
        for topic, new_partition_offsets in valid_new_offsets.iteritems()
        for partition, offset in new_partition_offsets.iteritems()
    ]

    if offset_storage == 'zookeeper' or not offset_storage:
        send_api = kafka_client.send_offset_commit_request
    elif offset_storage == 'kafka':
        send_api = kafka_client.send_offset_commit_request_kafka
    else:
        raise InvalidOffsetStorageError(offset_storage)

    status = []
    if group_offset_reqs:
        status = send_api(kafka_bytestring(group),
                          group_offset_reqs,
                          raise_on_error,
                          callback=_check_commit_response_error)

    return filter(None, status)