Exemplo n.º 1
0
    def test_encode_offset_fetch_request(self):
        header = "".join([
            struct.pack('>i', 69),  # Total message length
            struct.pack('>h', 9),  # Message type = offset fetch
            struct.pack('>h', 0),  # API version
            struct.pack('>i', 42),  # Correlation ID
            struct.pack('>h9s', 9, "client_id"),  # The client ID
            struct.pack('>h8s', 8, "group_id"),  # The group to commit for
            struct.pack('>i', 2),  # Num topics
        ])

        topic1 = "".join([
            struct.pack(">h6s", 6, "topic1"),  # Topic for the request
            struct.pack(">i", 2),  # Two partitions
            struct.pack(">i", 0),  # Partition 0
            struct.pack(">i", 1),  # Partition 1
        ])

        topic2 = "".join([
            struct.pack(">h6s", 6, "topic2"),  # Topic for the request
            struct.pack(">i", 1),  # One partitions
            struct.pack(">i", 2),  # Partition 2
        ])

        expected1 = "".join([header, topic1, topic2])
        expected2 = "".join([header, topic2, topic1])

        encoded = KafkaProtocol.encode_offset_fetch_request(
            "client_id", 42, "group_id", [
                OffsetFetchRequest("topic1", 0),
                OffsetFetchRequest("topic1", 1),
                OffsetFetchRequest("topic2", 2),
            ])

        self.assertIn(encoded, [expected1, expected2])
Exemplo n.º 2
0
    def fetch_last_known_offsets(self, partitions=None):
        if self.group is None:
            raise ValueError('KafkaClient.group must not be None')

        if partitions is None:
            partitions = self.client.get_partition_ids_for_topic(self.topic)

        responses = self.client.send_offset_fetch_request(
            self.group,
            [OffsetFetchRequest(self.topic, p) for p in partitions],
            fail_on_error=False)

        for resp in responses:
            try:
                check_error(resp)
            # API spec says server wont set an error here
            # but 0.8.1.1 does actually...
            except UnknownTopicOrPartitionError:
                pass

            # -1 offset signals no commit is currently stored
            if resp.offset == -1:
                self.offsets[resp.partition] = 0

            # Otherwise we committed the stored offset
            # and need to fetch the next one
            else:
                self.offsets[resp.partition] = resp.offset
Exemplo n.º 3
0
    def test_commit_fetch_offsets(self):
        req = OffsetCommitRequest(self.bytes_topic, 0, 42, b"metadata")
        (resp, ) = self.client.send_offset_commit_request(b"group", [req])
        self.assertEqual(resp.error, 0)

        req = OffsetFetchRequest(self.bytes_topic, 0)
        (resp, ) = self.client.send_offset_fetch_request(b"group", [req])
        self.assertEqual(resp.error, 0)
        self.assertEqual(resp.offset, 42)
        self.assertEqual(resp.metadata, b"")  # Metadata isn't stored for now
Exemplo n.º 4
0
    def __init__(self,
                 client,
                 group,
                 topic,
                 partitions=None,
                 auto_commit=True,
                 auto_commit_every_n=AUTO_COMMIT_MSG_COUNT,
                 auto_commit_every_t=AUTO_COMMIT_INTERVAL):

        self.client = client
        self.topic = topic
        self.group = group
        self.client.load_metadata_for_topics(topic)
        self.offsets = {}

        if not partitions:
            partitions = self.client.topic_partitions[topic]
        else:
            assert all(isinstance(x, numbers.Integral) for x in partitions)

        # Variables for handling offset commits
        self.commit_lock = Lock()
        self.commit_timer = None
        self.count_since_commit = 0
        self.auto_commit = auto_commit
        self.auto_commit_every_n = auto_commit_every_n
        self.auto_commit_every_t = auto_commit_every_t

        # Set up the auto-commit timer
        if auto_commit is True and auto_commit_every_t is not None:
            self.commit_timer = ReentrantTimer(auto_commit_every_t,
                                               self.commit)
            self.commit_timer.start()

        def get_or_init_offset_callback(resp):
            try:
                kafka.common.check_error(resp)
                return resp.offset
            except kafka.common.UnknownTopicOrPartitionError:
                return 0

        if auto_commit:
            for partition in partitions:
                req = OffsetFetchRequest(topic, partition)
                (offset, ) = self.client.send_offset_fetch_request(
                    group, [req],
                    callback=get_or_init_offset_callback,
                    fail_on_error=False)
                self.offsets[partition] = offset
        else:
            for partition in partitions:
                self.offsets[partition] = 0
Exemplo n.º 5
0
    def __init__(self,
                 client,
                 group,
                 topic,
                 partitions=None,
                 auto_commit=True,
                 auto_commit_every_n=AUTO_COMMIT_MSG_COUNT,
                 auto_commit_every_t=AUTO_COMMIT_INTERVAL):

        self.client = client
        self.topic = topic
        self.group = group
        self.client.load_metadata_for_topics(topic)
        self.offsets = {}

        if not partitions:
            partitions = self.client.topic_partitions[topic]

        # Variables for handling offset commits
        self.commit_lock = Lock()
        self.commit_timer = None
        self.count_since_commit = 0
        self.auto_commit = auto_commit
        self.auto_commit_every_n = auto_commit_every_n
        self.auto_commit_every_t = auto_commit_every_t

        # Set up the auto-commit timer
        if auto_commit is True and auto_commit_every_t is not None:
            self.commit_timer = ReentrantTimer(auto_commit_every_t,
                                               self.commit)
            self.commit_timer.start()

        def get_or_init_offset_callback(resp):
            if resp.error == ErrorMapping.NO_ERROR:
                return resp.offset
            elif resp.error == ErrorMapping.UNKNOWN_TOPIC_OR_PARTITON:
                return 0
            else:
                raise Exception("OffsetFetchRequest for topic=%s, "
                                "partition=%d failed with errorcode=%s" %
                                (resp.topic, resp.partition, resp.error))

        # Uncomment for 0.8.1
        for partition in partitions:
            req = OffsetFetchRequest(topic, partition)
            (offset, ) = self.client.send_offset_fetch_request(
                group, [req],
                callback=get_or_init_offset_callback,
                fail_on_error=False)
            self.offsets[partition] = offset
    def _update_group_offsets(self):
        logger.info("Consumer fetching stored offsets")
        for partition in self._client.get_partition_ids_for_topic(self._topic):
            (resp, ) = self._client.send_offset_fetch_request(
                self._group_id, [OffsetFetchRequest(self._topic, partition)],
                fail_on_error=False)
            try:
                check_error(resp)
            except UnknownTopicOrPartitionError:
                pass

            if resp.offset == -1:
                self._offsets.commit[partition] = None
            else:
                self._offsets.commit[partition] = resp.offset
Exemplo n.º 7
0
    def fetch_last_known_offsets(self, partitions=None):
        if not partitions:
            partitions = self.client.get_partition_ids_for_topic(self.topic)

        def get_or_init_offset(resp):
            try:
                kafka.common.check_error(resp)
                return resp.offset
            except UnknownTopicOrPartitionError:
                return 0

        for partition in partitions:
            req = OffsetFetchRequest(self.topic, partition)
            (resp, ) = self.client.send_offset_fetch_request(
                self.group, [req], fail_on_error=False)
            self.offsets[partition] = get_or_init_offset(resp)
        self.fetch_offsets = self.offsets.copy()
Exemplo n.º 8
0
    def fetch_last_known_offsets(self, partitions=None):
        yield from self._client.load_metadata_for_topics(self._topic)

        # if not partitions:
        partitions = self._client.get_partition_ids_for_topic(self._topic)
        for partition in partitions:
            req = OffsetFetchRequest(self._topic, partition)
            try:
                (resp, ) = yield from self._client.send_offset_fetch_request(
                    self._group, [req])
                partition_offset = resp.offset
            except UnknownTopicOrPartitionError:
                partition_offset = 0

            self._offsets[partition] = partition_offset

        self._fetch_offsets = self._offsets.copy()
Exemplo n.º 9
0
    def _get_commit_offsets(self):
        logger.info("Consumer fetching stored offsets")
        for topic_partition in self._topics:
            (resp, ) = self._client.send_offset_fetch_request(
                kafka_bytestring(self._config['group_id']),
                [OffsetFetchRequest(topic_partition[0], topic_partition[1])],
                fail_on_error=False)
            try:
                check_error(resp)
            # API spec says server wont set an error here
            # but 0.8.1.1 does actually...
            except UnknownTopicOrPartitionError:
                pass

            # -1 offset signals no commit is currently stored
            if resp.offset == -1:
                self._offsets.commit[topic_partition] = None

            # Otherwise we committed the stored offset
            # and need to fetch the next one
            else:
                self._offsets.commit[topic_partition] = resp.offset
Exemplo n.º 10
0
def get_current_consumer_offsets(
    kafka_client,
    group,
    topics,
    raise_on_error=True,
    offset_storage='zookeeper',
):
    """ Get current consumer offsets.

    NOTE: This method does not refresh client metadata. It is up to the caller
    to avoid using stale metadata.

    If any partition leader is not available, the request fails for all the
    other topics. This is the tradeoff of sending all topic requests in batch
    and save both in performance and Kafka load.

    :param kafka_client: a connected KafkaToolClient
    :param group: kafka group_id
    :param topics: topic list or dict {<topic>: [partitions]}
    :param raise_on_error: if False the method ignores missing topics and
      missing partitions. It still may fail on the request send.
    :param offset_storage: String, one of {zookeeper, kafka}.
    :returns: a dict topic: partition: offset
    :raises:
      :py:class:`kafka_utils.util.error.UnknownTopic`: upon missing
      topics and raise_on_error=True

      :py:class:`kafka_utils.util.error.UnknownPartition`: upon missing
      partitions and raise_on_error=True

      :py:class:`kafka_utils.util.error.InvalidOffsetStorageError: upon unknown
      offset_storage choice.

      FailedPayloadsError: upon send request error.
    """

    topics = _verify_topics_and_partitions(kafka_client, topics,
                                           raise_on_error)

    group_offset_reqs = [
        OffsetFetchRequest(kafka_bytestring(topic), partition)
        for topic, partitions in topics.iteritems() for partition in partitions
    ]

    group_offsets = {}

    if offset_storage == 'zookeeper':
        send_api = kafka_client.send_offset_fetch_request
    elif offset_storage == 'kafka':
        send_api = kafka_client.send_offset_fetch_request_kafka
    else:
        raise InvalidOffsetStorageError(offset_storage)

    if group_offset_reqs:
        # fail_on_error = False does not prevent network errors
        group_resps = send_api(
            group=kafka_bytestring(group),
            payloads=group_offset_reqs,
            fail_on_error=False,
            callback=pluck_topic_offset_or_zero_on_unknown,
        )
        for resp in group_resps:
            group_offsets.setdefault(
                resp.topic,
                {},
            )[resp.partition] = resp.offset

    return group_offsets