示例#1
0
def test_synchronized_consumer_handles_end_of_partition(
    broker: Broker[KafkaPayload], ) -> None:
    topic = Topic("topic")
    commit_log_topic = Topic("commit-log")

    broker.create_topic(topic, partitions=1)
    broker.create_topic(commit_log_topic, partitions=1)

    consumer = broker.get_consumer("consumer", enable_end_of_partition=True)
    producer = broker.get_producer()
    commit_log_consumer = broker.get_consumer("commit-log-consumer")

    messages = [
        producer.produce(topic, KafkaPayload(None, f"{i}".encode("utf8"),
                                             [])).result(1.0) for i in range(2)
    ]

    synchronized_consumer: Consumer[KafkaPayload] = SynchronizedConsumer(
        consumer,
        commit_log_consumer,
        commit_log_topic=commit_log_topic,
        commit_log_groups={"leader"},
    )

    with closing(synchronized_consumer):
        synchronized_consumer.subscribe([topic])

        wait_for_consumer(
            commit_log_consumer,
            producer.produce(
                commit_log_topic,
                commit_codec.encode(
                    Commit("leader", Partition(topic, 0),
                           messages[0].next_offset), ),
            ).result(),
        )

        assert synchronized_consumer.poll(0) == messages[0]

        # If the commit log consumer does not handle EOF, it will have crashed
        # here and will never return the next message.
        wait_for_consumer(
            commit_log_consumer,
            producer.produce(
                commit_log_topic,
                commit_codec.encode(
                    Commit("leader", Partition(topic, 0),
                           messages[1].next_offset), ),
            ).result(),
        )

        assert synchronized_consumer.poll(0) == messages[1]
示例#2
0
def test_synchronized_consumer_handles_end_of_partition() -> None:
    topic = Topic("topic")
    commit_log_topic = Topic("commit-log")

    broker: DummyBroker[int] = DummyBroker()
    broker.create_topic(topic, partitions=1)
    consumer: Consumer[int] = DummyConsumer(broker, "consumer")
    producer: Producer[int] = DummyProducer(broker)
    messages = [producer.produce(topic, i).result(1.0) for i in range(2)]

    commit_log_broker: DummyBroker[Commit] = DummyBroker()
    commit_log_broker.create_topic(commit_log_topic, partitions=1)
    commit_log_consumer: Consumer[Commit] = DummyConsumer(
        commit_log_broker, "commit-log-consumer", enable_end_of_partition=True)
    commit_log_producer: Producer[Commit] = DummyProducer(commit_log_broker)

    synchronized_consumer: Consumer[int] = SynchronizedConsumer(
        consumer,
        commit_log_consumer,
        commit_log_topic=commit_log_topic,
        commit_log_groups={"leader"},
    )

    with closing(synchronized_consumer):
        synchronized_consumer.subscribe([topic])

        wait_for_consumer(
            commit_log_consumer,
            commit_log_producer.produce(
                commit_log_topic,
                Commit("leader", Partition(topic, 0),
                       messages[0].get_next_offset()),
            ).result(),
        )

        assert synchronized_consumer.poll(0) == messages[0]

        # If the commit log consumer does not handle EOF, it will have crashed
        # here and will never return the next message.
        wait_for_consumer(
            commit_log_consumer,
            commit_log_producer.produce(
                commit_log_topic,
                Commit("leader", Partition(topic, 0),
                       messages[1].get_next_offset()),
            ).result(),
        )

        assert synchronized_consumer.poll(0) == messages[1]
示例#3
0
文件: kafka.py 项目: ruezetle/snuba
    def decode(self, value: KafkaPayload) -> Commit:
        key = value.key
        if not isinstance(key, bytes):
            raise TypeError("payload key must be a bytes object")

        val = value.value
        if not isinstance(val, bytes):
            raise TypeError("payload value must be a bytes object")

        topic_name, partition_index, group = key.decode("utf-8").split(":", 3)
        offset = int(val.decode("utf-8"))
        return Commit(group, Partition(Topic(topic_name),
                                       int(partition_index)), offset)
示例#4
0
文件: kafka.py 项目: ruezetle/snuba
    def commit_offsets(self) -> Mapping[Partition, int]:
        offsets = super().commit_offsets()

        codec = CommitCodec()
        for partition, offset in offsets.items():
            payload = codec.encode(Commit(self.__group_id, partition, offset))
            self.__producer.produce(
                self.__commit_log_topic.name,
                key=payload.key,
                value=payload.value,
                on_delivery=self.__commit_message_delivery_callback,
            )

        return offsets
示例#5
0
    def test_commit_log_consumer(self) -> None:
        # XXX: This would be better as an integration test (or at least a test
        # against an abstract Producer interface) instead of against a test against
        # a mock.
        commit_log_producer = FakeConfluentKafkaProducer()

        consumer: KafkaConsumer[int] = KafkaConsumerWithCommitLog(
            {
                **self.configuration,
                "auto.offset.reset": "earliest",
                "enable.auto.commit": "false",
                "enable.auto.offset.store": "false",
                "enable.partition.eof": "true",
                "group.id": "test",
                "session.timeout.ms": 10000,
            },
            codec=self.codec,
            producer=commit_log_producer,
            commit_log_topic=Topic("commit-log"),
        )

        with self.get_topic() as topic, closing(consumer) as consumer:
            consumer.subscribe([topic])

            with closing(self.get_producer()) as producer:
                producer.produce(topic, 0).result(5.0)

            message = consumer.poll(
                10.0)  # XXX: getting the subscription is slow
            assert isinstance(message, Message)

            consumer.stage_offsets(
                {message.partition: message.get_next_offset()})

            assert consumer.commit_offsets() == {
                Partition(topic, 0): message.get_next_offset()
            }

            assert len(commit_log_producer.messages) == 1
            commit_message = commit_log_producer.messages[0]
            assert commit_message.topic() == "commit-log"

            assert CommitCodec().decode(
                KafkaPayload(commit_message.key(),
                             commit_message.value())) == Commit(
                                 "test", Partition(topic, 0),
                                 message.get_next_offset())
示例#6
0
def test_synchronized_consumer(broker: Broker[KafkaPayload]) -> None:
    topic = Topic("topic")
    commit_log_topic = Topic("commit-log")

    broker.create_topic(topic, partitions=1)
    broker.create_topic(commit_log_topic, partitions=1)

    consumer = broker.get_consumer("consumer")
    producer = broker.get_producer()
    commit_log_consumer = broker.get_consumer("commit-log-consumer")

    messages = [
        producer.produce(topic, KafkaPayload(None, f"{i}".encode("utf8"),
                                             [])).result(1.0) for i in range(6)
    ]

    synchronized_consumer: Consumer[KafkaPayload] = SynchronizedConsumer(
        consumer,
        commit_log_consumer,
        commit_log_topic=commit_log_topic,
        commit_log_groups={"leader-a", "leader-b"},
    )

    with closing(synchronized_consumer):
        synchronized_consumer.subscribe([topic])

        # The consumer should not consume any messages until it receives a
        # commit from both groups that are being followed.
        with assert_changes(consumer.paused, [],
                            [Partition(topic, 0)]), assert_changes(
                                consumer.tell, {},
                                {Partition(topic, 0): messages[0].offset}):
            assert synchronized_consumer.poll(0.0) is None

        wait_for_consumer(
            commit_log_consumer,
            producer.produce(
                commit_log_topic,
                commit_codec.encode(
                    Commit("leader-a", Partition(topic, 0),
                           messages[0].next_offset)),
            ).result(),
        )

        # The consumer should remain paused, since it needs both groups to
        # advance before it may continue.
        with assert_does_not_change(
                consumer.paused,
            [Partition(topic, 0)]), assert_does_not_change(
                consumer.tell, {Partition(topic, 0): messages[0].offset}):
            assert synchronized_consumer.poll(0.0) is None

        wait_for_consumer(
            commit_log_consumer,
            producer.produce(
                commit_log_topic,
                commit_codec.encode(
                    Commit("leader-b", Partition(topic, 0),
                           messages[0].next_offset)),
            ).result(),
        )

        # The consumer should be able to resume consuming, since both consumers
        # have processed the first message.
        with assert_changes(consumer.paused, [Partition(topic, 0)],
                            []), assert_changes(
                                consumer.tell,
                                {Partition(topic, 0): messages[0].offset},
                                {Partition(topic, 0): messages[0].next_offset},
                            ):
            assert synchronized_consumer.poll(0.0) == messages[0]

        # After consuming the one available message, the consumer should be
        # paused again until the remote offsets advance.
        with assert_changes(consumer.paused, [],
                            [Partition(topic, 0)]), assert_does_not_change(
                                consumer.tell,
                                {Partition(topic, 0): messages[1].offset}):
            assert synchronized_consumer.poll(0.0) is None

        # Emulate the unlikely (but possible) scenario of the leader offsets
        # being within a series of compacted (deleted) messages by:
        # 1. moving the remote offsets forward, so that the partition is resumed
        # 2. seeking the consumer beyond the remote offsets

        producer.produce(
            commit_log_topic,
            commit_codec.encode(
                Commit("leader-a", Partition(topic, 0), messages[3].offset)),
        ).result()

        wait_for_consumer(
            commit_log_consumer,
            producer.produce(
                commit_log_topic,
                commit_codec.encode(
                    Commit("leader-b", Partition(topic, 0),
                           messages[5].offset)),
            ).result(),
        )

        # The consumer should be able to resume consuming, since both consumers
        # have processed the first message.
        with assert_changes(consumer.paused, [Partition(topic, 0)],
                            []), assert_changes(
                                consumer.tell,
                                {Partition(topic, 0): messages[1].offset},
                                {Partition(topic, 0): messages[1].next_offset},
                            ):
            assert synchronized_consumer.poll(0.0) == messages[1]

        # At this point, we manually seek the consumer offset, to emulate messages being skipped.
        with assert_changes(
                consumer.tell,
            {Partition(topic, 0): messages[2].offset},
            {Partition(topic, 0): messages[4].offset},
        ):
            consumer.seek({Partition(topic, 0): messages[4].offset})

        # Since the (effective) remote offset is the offset for message #3 (via
        # ``leader-a``), and the local offset is the offset of message #4, when
        # message #4 is consumed, it should be discarded and the offset should
        # be rolled back to wait for the commit log to advance.
        with assert_changes(consumer.paused, [],
                            [Partition(topic, 0)]), assert_does_not_change(
                                consumer.tell,
                                {Partition(topic, 0): messages[4].offset}):
            assert synchronized_consumer.poll(0.0) is None

        wait_for_consumer(
            commit_log_consumer,
            producer.produce(
                commit_log_topic,
                commit_codec.encode(
                    Commit("leader-a", Partition(topic, 0),
                           messages[5].offset)),
            ).result(),
        )

        # The consumer should be able to resume consuming.
        with assert_changes(consumer.paused, [Partition(topic, 0)],
                            []), assert_changes(
                                consumer.tell,
                                {Partition(topic, 0): messages[4].offset},
                                {Partition(topic, 0): messages[4].next_offset},
                            ):
            assert synchronized_consumer.poll(0.0) == messages[4]
示例#7
0
def test_synchronized_consumer_pause_resume(
        broker: Broker[KafkaPayload]) -> None:
    topic = Topic("topic")
    commit_log_topic = Topic("commit-log")

    broker.create_topic(topic, partitions=1)
    broker.create_topic(commit_log_topic, partitions=1)

    consumer = broker.get_consumer("consumer")
    producer = broker.get_producer()
    commit_log_consumer = broker.get_consumer("commit-log-consumer")

    messages = [
        producer.produce(topic, KafkaPayload(None, f"{i}".encode("utf8"),
                                             [])).result(1.0) for i in range(2)
    ]

    synchronized_consumer: Consumer[KafkaPayload] = SynchronizedConsumer(
        consumer,
        commit_log_consumer,
        commit_log_topic=commit_log_topic,
        commit_log_groups={"leader"},
    )

    with closing(synchronized_consumer):

        def assignment_callback(offsets: Mapping[Partition, int]) -> None:
            synchronized_consumer.pause([Partition(topic, 0)])

        synchronized_consumer.subscribe([topic], on_assign=assignment_callback)

        with assert_changes(synchronized_consumer.paused, [],
                            [Partition(topic, 0)]), assert_changes(
                                consumer.paused, [], [Partition(topic, 0)]):
            assert synchronized_consumer.poll(0.0) is None

        # Advancing the commit log offset should not cause the consumer to
        # resume, since it has been explicitly paused.
        wait_for_consumer(
            commit_log_consumer,
            producer.produce(
                commit_log_topic,
                commit_codec.encode(
                    Commit("leader", Partition(topic, 0),
                           messages[0].next_offset)),
            ).result(),
        )

        with assert_does_not_change(consumer.paused, [Partition(topic, 0)]):
            assert synchronized_consumer.poll(0) is None

        # Resuming the partition does not immediately cause the partition to
        # resume, but it should look as if it is resumed to the caller.
        with assert_changes(synchronized_consumer.paused,
                            [Partition(topic, 0)], []), assert_does_not_change(
                                consumer.paused, [Partition(topic, 0)]):
            synchronized_consumer.resume([Partition(topic, 0)])

        # The partition should be resumed on the next poll call, however.
        with assert_changes(consumer.paused, [Partition(topic, 0)], []):
            assert synchronized_consumer.poll(0) == messages[0]

        # Pausing due to hitting the offset fence should not appear as a paused
        # partition to the caller.
        with assert_does_not_change(synchronized_consumer.paused,
                                    []), assert_changes(
                                        consumer.paused, [],
                                        [Partition(topic, 0)]):
            assert synchronized_consumer.poll(0) is None

        # Other pause and resume actions should not cause the inner consumer to
        # change its state while up against the fence.
        with assert_changes(synchronized_consumer.paused, [],
                            [Partition(topic, 0)]), assert_does_not_change(
                                consumer.paused, [Partition(topic, 0)]):
            synchronized_consumer.pause([Partition(topic, 0)])

        with assert_changes(synchronized_consumer.paused,
                            [Partition(topic, 0)], []), assert_does_not_change(
                                consumer.paused, [Partition(topic, 0)]):
            synchronized_consumer.resume([Partition(topic, 0)])
示例#8
0
def test_commit_codec() -> None:
    commit = Commit("group", Partition(Topic("topic"), 0), 0)
    assert commit_codec.decode(commit_codec.encode(commit)) == commit
示例#9
0
def test_synchronized_consumer_pause_resume() -> None:
    topic = Topic("topic")
    commit_log_topic = Topic("commit-log")

    broker: DummyBroker[int] = DummyBroker()
    broker.create_topic(topic, partitions=1)
    consumer: Consumer[int] = DummyConsumer(broker, "consumer")
    producer: Producer[int] = DummyProducer(broker)
    messages = [producer.produce(topic, i).result(1.0) for i in range(2)]

    commit_log_broker: DummyBroker[Commit] = DummyBroker()
    commit_log_broker.create_topic(commit_log_topic, partitions=1)
    commit_log_consumer: Consumer[Commit] = DummyConsumer(
        commit_log_broker, "commit-log-consumer")
    commit_log_producer: Producer[Commit] = DummyProducer(commit_log_broker)

    synchronized_consumer: Consumer[int] = SynchronizedConsumer(
        consumer,
        commit_log_consumer,
        commit_log_topic=commit_log_topic,
        commit_log_groups={"leader"},
    )

    with closing(synchronized_consumer):
        synchronized_consumer.subscribe([topic])

        # TODO: This test is not ideal -- there are no guarantees that the
        # commit log worker has subscribed and started polling yet.
        with assert_changes(synchronized_consumer.paused, [],
                            [Partition(topic, 0)]), assert_changes(
                                consumer.paused, [], [Partition(topic, 0)]):
            synchronized_consumer.pause([Partition(topic, 0)])

        # Advancing the commit log offset should not cause the consumer to
        # resume, since it has been explicitly paused.
        wait_for_consumer(
            commit_log_consumer,
            commit_log_producer.produce(
                commit_log_topic,
                Commit("leader", Partition(topic, 0),
                       messages[0].get_next_offset()),
            ).result(),
        )

        with assert_does_not_change(consumer.paused, [Partition(topic, 0)]):
            assert synchronized_consumer.poll(0) is None

        # Resuming the partition does not immediately cause the partition to
        # resume, but it should look as if it is resumed to the caller.
        with assert_changes(synchronized_consumer.paused,
                            [Partition(topic, 0)], []), assert_does_not_change(
                                consumer.paused, [Partition(topic, 0)]):
            synchronized_consumer.resume([Partition(topic, 0)])

        # The partition should be resumed on the next poll call, however.
        with assert_changes(consumer.paused, [Partition(topic, 0)], []):
            assert synchronized_consumer.poll(0) == messages[0]

        # Pausing due to hitting the offset fence should not appear as a paused
        # partition to the caller.
        with assert_does_not_change(synchronized_consumer.paused,
                                    []), assert_changes(
                                        consumer.paused, [],
                                        [Partition(topic, 0)]):
            assert synchronized_consumer.poll(0) is None

        # Other pause and resume actions should not cause the inner consumer to
        # change its state while up against the fence.
        with assert_changes(synchronized_consumer.paused, [],
                            [Partition(topic, 0)]), assert_does_not_change(
                                consumer.paused, [Partition(topic, 0)]):
            synchronized_consumer.pause([Partition(topic, 0)])

        with assert_changes(synchronized_consumer.paused,
                            [Partition(topic, 0)], []), assert_does_not_change(
                                consumer.paused, [Partition(topic, 0)]):
            synchronized_consumer.resume([Partition(topic, 0)])