def test_synchronized_consumer_handles_end_of_partition( broker: Broker[KafkaPayload], ) -> None: topic = Topic("topic") commit_log_topic = Topic("commit-log") broker.create_topic(topic, partitions=1) broker.create_topic(commit_log_topic, partitions=1) consumer = broker.get_consumer("consumer", enable_end_of_partition=True) producer = broker.get_producer() commit_log_consumer = broker.get_consumer("commit-log-consumer") messages = [ producer.produce(topic, KafkaPayload(None, f"{i}".encode("utf8"), [])).result(1.0) for i in range(2) ] synchronized_consumer: Consumer[KafkaPayload] = SynchronizedConsumer( consumer, commit_log_consumer, commit_log_topic=commit_log_topic, commit_log_groups={"leader"}, ) with closing(synchronized_consumer): synchronized_consumer.subscribe([topic]) wait_for_consumer( commit_log_consumer, producer.produce( commit_log_topic, commit_codec.encode( Commit("leader", Partition(topic, 0), messages[0].next_offset), ), ).result(), ) assert synchronized_consumer.poll(0) == messages[0] # If the commit log consumer does not handle EOF, it will have crashed # here and will never return the next message. wait_for_consumer( commit_log_consumer, producer.produce( commit_log_topic, commit_codec.encode( Commit("leader", Partition(topic, 0), messages[1].next_offset), ), ).result(), ) assert synchronized_consumer.poll(0) == messages[1]
def test_synchronized_consumer_handles_end_of_partition() -> None: topic = Topic("topic") commit_log_topic = Topic("commit-log") broker: DummyBroker[int] = DummyBroker() broker.create_topic(topic, partitions=1) consumer: Consumer[int] = DummyConsumer(broker, "consumer") producer: Producer[int] = DummyProducer(broker) messages = [producer.produce(topic, i).result(1.0) for i in range(2)] commit_log_broker: DummyBroker[Commit] = DummyBroker() commit_log_broker.create_topic(commit_log_topic, partitions=1) commit_log_consumer: Consumer[Commit] = DummyConsumer( commit_log_broker, "commit-log-consumer", enable_end_of_partition=True) commit_log_producer: Producer[Commit] = DummyProducer(commit_log_broker) synchronized_consumer: Consumer[int] = SynchronizedConsumer( consumer, commit_log_consumer, commit_log_topic=commit_log_topic, commit_log_groups={"leader"}, ) with closing(synchronized_consumer): synchronized_consumer.subscribe([topic]) wait_for_consumer( commit_log_consumer, commit_log_producer.produce( commit_log_topic, Commit("leader", Partition(topic, 0), messages[0].get_next_offset()), ).result(), ) assert synchronized_consumer.poll(0) == messages[0] # If the commit log consumer does not handle EOF, it will have crashed # here and will never return the next message. wait_for_consumer( commit_log_consumer, commit_log_producer.produce( commit_log_topic, Commit("leader", Partition(topic, 0), messages[1].get_next_offset()), ).result(), ) assert synchronized_consumer.poll(0) == messages[1]
def decode(self, value: KafkaPayload) -> Commit: key = value.key if not isinstance(key, bytes): raise TypeError("payload key must be a bytes object") val = value.value if not isinstance(val, bytes): raise TypeError("payload value must be a bytes object") topic_name, partition_index, group = key.decode("utf-8").split(":", 3) offset = int(val.decode("utf-8")) return Commit(group, Partition(Topic(topic_name), int(partition_index)), offset)
def commit_offsets(self) -> Mapping[Partition, int]: offsets = super().commit_offsets() codec = CommitCodec() for partition, offset in offsets.items(): payload = codec.encode(Commit(self.__group_id, partition, offset)) self.__producer.produce( self.__commit_log_topic.name, key=payload.key, value=payload.value, on_delivery=self.__commit_message_delivery_callback, ) return offsets
def test_commit_log_consumer(self) -> None: # XXX: This would be better as an integration test (or at least a test # against an abstract Producer interface) instead of against a test against # a mock. commit_log_producer = FakeConfluentKafkaProducer() consumer: KafkaConsumer[int] = KafkaConsumerWithCommitLog( { **self.configuration, "auto.offset.reset": "earliest", "enable.auto.commit": "false", "enable.auto.offset.store": "false", "enable.partition.eof": "true", "group.id": "test", "session.timeout.ms": 10000, }, codec=self.codec, producer=commit_log_producer, commit_log_topic=Topic("commit-log"), ) with self.get_topic() as topic, closing(consumer) as consumer: consumer.subscribe([topic]) with closing(self.get_producer()) as producer: producer.produce(topic, 0).result(5.0) message = consumer.poll( 10.0) # XXX: getting the subscription is slow assert isinstance(message, Message) consumer.stage_offsets( {message.partition: message.get_next_offset()}) assert consumer.commit_offsets() == { Partition(topic, 0): message.get_next_offset() } assert len(commit_log_producer.messages) == 1 commit_message = commit_log_producer.messages[0] assert commit_message.topic() == "commit-log" assert CommitCodec().decode( KafkaPayload(commit_message.key(), commit_message.value())) == Commit( "test", Partition(topic, 0), message.get_next_offset())
def test_synchronized_consumer(broker: Broker[KafkaPayload]) -> None: topic = Topic("topic") commit_log_topic = Topic("commit-log") broker.create_topic(topic, partitions=1) broker.create_topic(commit_log_topic, partitions=1) consumer = broker.get_consumer("consumer") producer = broker.get_producer() commit_log_consumer = broker.get_consumer("commit-log-consumer") messages = [ producer.produce(topic, KafkaPayload(None, f"{i}".encode("utf8"), [])).result(1.0) for i in range(6) ] synchronized_consumer: Consumer[KafkaPayload] = SynchronizedConsumer( consumer, commit_log_consumer, commit_log_topic=commit_log_topic, commit_log_groups={"leader-a", "leader-b"}, ) with closing(synchronized_consumer): synchronized_consumer.subscribe([topic]) # The consumer should not consume any messages until it receives a # commit from both groups that are being followed. with assert_changes(consumer.paused, [], [Partition(topic, 0)]), assert_changes( consumer.tell, {}, {Partition(topic, 0): messages[0].offset}): assert synchronized_consumer.poll(0.0) is None wait_for_consumer( commit_log_consumer, producer.produce( commit_log_topic, commit_codec.encode( Commit("leader-a", Partition(topic, 0), messages[0].next_offset)), ).result(), ) # The consumer should remain paused, since it needs both groups to # advance before it may continue. with assert_does_not_change( consumer.paused, [Partition(topic, 0)]), assert_does_not_change( consumer.tell, {Partition(topic, 0): messages[0].offset}): assert synchronized_consumer.poll(0.0) is None wait_for_consumer( commit_log_consumer, producer.produce( commit_log_topic, commit_codec.encode( Commit("leader-b", Partition(topic, 0), messages[0].next_offset)), ).result(), ) # The consumer should be able to resume consuming, since both consumers # have processed the first message. with assert_changes(consumer.paused, [Partition(topic, 0)], []), assert_changes( consumer.tell, {Partition(topic, 0): messages[0].offset}, {Partition(topic, 0): messages[0].next_offset}, ): assert synchronized_consumer.poll(0.0) == messages[0] # After consuming the one available message, the consumer should be # paused again until the remote offsets advance. with assert_changes(consumer.paused, [], [Partition(topic, 0)]), assert_does_not_change( consumer.tell, {Partition(topic, 0): messages[1].offset}): assert synchronized_consumer.poll(0.0) is None # Emulate the unlikely (but possible) scenario of the leader offsets # being within a series of compacted (deleted) messages by: # 1. moving the remote offsets forward, so that the partition is resumed # 2. seeking the consumer beyond the remote offsets producer.produce( commit_log_topic, commit_codec.encode( Commit("leader-a", Partition(topic, 0), messages[3].offset)), ).result() wait_for_consumer( commit_log_consumer, producer.produce( commit_log_topic, commit_codec.encode( Commit("leader-b", Partition(topic, 0), messages[5].offset)), ).result(), ) # The consumer should be able to resume consuming, since both consumers # have processed the first message. with assert_changes(consumer.paused, [Partition(topic, 0)], []), assert_changes( consumer.tell, {Partition(topic, 0): messages[1].offset}, {Partition(topic, 0): messages[1].next_offset}, ): assert synchronized_consumer.poll(0.0) == messages[1] # At this point, we manually seek the consumer offset, to emulate messages being skipped. with assert_changes( consumer.tell, {Partition(topic, 0): messages[2].offset}, {Partition(topic, 0): messages[4].offset}, ): consumer.seek({Partition(topic, 0): messages[4].offset}) # Since the (effective) remote offset is the offset for message #3 (via # ``leader-a``), and the local offset is the offset of message #4, when # message #4 is consumed, it should be discarded and the offset should # be rolled back to wait for the commit log to advance. with assert_changes(consumer.paused, [], [Partition(topic, 0)]), assert_does_not_change( consumer.tell, {Partition(topic, 0): messages[4].offset}): assert synchronized_consumer.poll(0.0) is None wait_for_consumer( commit_log_consumer, producer.produce( commit_log_topic, commit_codec.encode( Commit("leader-a", Partition(topic, 0), messages[5].offset)), ).result(), ) # The consumer should be able to resume consuming. with assert_changes(consumer.paused, [Partition(topic, 0)], []), assert_changes( consumer.tell, {Partition(topic, 0): messages[4].offset}, {Partition(topic, 0): messages[4].next_offset}, ): assert synchronized_consumer.poll(0.0) == messages[4]
def test_synchronized_consumer_pause_resume( broker: Broker[KafkaPayload]) -> None: topic = Topic("topic") commit_log_topic = Topic("commit-log") broker.create_topic(topic, partitions=1) broker.create_topic(commit_log_topic, partitions=1) consumer = broker.get_consumer("consumer") producer = broker.get_producer() commit_log_consumer = broker.get_consumer("commit-log-consumer") messages = [ producer.produce(topic, KafkaPayload(None, f"{i}".encode("utf8"), [])).result(1.0) for i in range(2) ] synchronized_consumer: Consumer[KafkaPayload] = SynchronizedConsumer( consumer, commit_log_consumer, commit_log_topic=commit_log_topic, commit_log_groups={"leader"}, ) with closing(synchronized_consumer): def assignment_callback(offsets: Mapping[Partition, int]) -> None: synchronized_consumer.pause([Partition(topic, 0)]) synchronized_consumer.subscribe([topic], on_assign=assignment_callback) with assert_changes(synchronized_consumer.paused, [], [Partition(topic, 0)]), assert_changes( consumer.paused, [], [Partition(topic, 0)]): assert synchronized_consumer.poll(0.0) is None # Advancing the commit log offset should not cause the consumer to # resume, since it has been explicitly paused. wait_for_consumer( commit_log_consumer, producer.produce( commit_log_topic, commit_codec.encode( Commit("leader", Partition(topic, 0), messages[0].next_offset)), ).result(), ) with assert_does_not_change(consumer.paused, [Partition(topic, 0)]): assert synchronized_consumer.poll(0) is None # Resuming the partition does not immediately cause the partition to # resume, but it should look as if it is resumed to the caller. with assert_changes(synchronized_consumer.paused, [Partition(topic, 0)], []), assert_does_not_change( consumer.paused, [Partition(topic, 0)]): synchronized_consumer.resume([Partition(topic, 0)]) # The partition should be resumed on the next poll call, however. with assert_changes(consumer.paused, [Partition(topic, 0)], []): assert synchronized_consumer.poll(0) == messages[0] # Pausing due to hitting the offset fence should not appear as a paused # partition to the caller. with assert_does_not_change(synchronized_consumer.paused, []), assert_changes( consumer.paused, [], [Partition(topic, 0)]): assert synchronized_consumer.poll(0) is None # Other pause and resume actions should not cause the inner consumer to # change its state while up against the fence. with assert_changes(synchronized_consumer.paused, [], [Partition(topic, 0)]), assert_does_not_change( consumer.paused, [Partition(topic, 0)]): synchronized_consumer.pause([Partition(topic, 0)]) with assert_changes(synchronized_consumer.paused, [Partition(topic, 0)], []), assert_does_not_change( consumer.paused, [Partition(topic, 0)]): synchronized_consumer.resume([Partition(topic, 0)])
def test_commit_codec() -> None: commit = Commit("group", Partition(Topic("topic"), 0), 0) assert commit_codec.decode(commit_codec.encode(commit)) == commit
def test_synchronized_consumer_pause_resume() -> None: topic = Topic("topic") commit_log_topic = Topic("commit-log") broker: DummyBroker[int] = DummyBroker() broker.create_topic(topic, partitions=1) consumer: Consumer[int] = DummyConsumer(broker, "consumer") producer: Producer[int] = DummyProducer(broker) messages = [producer.produce(topic, i).result(1.0) for i in range(2)] commit_log_broker: DummyBroker[Commit] = DummyBroker() commit_log_broker.create_topic(commit_log_topic, partitions=1) commit_log_consumer: Consumer[Commit] = DummyConsumer( commit_log_broker, "commit-log-consumer") commit_log_producer: Producer[Commit] = DummyProducer(commit_log_broker) synchronized_consumer: Consumer[int] = SynchronizedConsumer( consumer, commit_log_consumer, commit_log_topic=commit_log_topic, commit_log_groups={"leader"}, ) with closing(synchronized_consumer): synchronized_consumer.subscribe([topic]) # TODO: This test is not ideal -- there are no guarantees that the # commit log worker has subscribed and started polling yet. with assert_changes(synchronized_consumer.paused, [], [Partition(topic, 0)]), assert_changes( consumer.paused, [], [Partition(topic, 0)]): synchronized_consumer.pause([Partition(topic, 0)]) # Advancing the commit log offset should not cause the consumer to # resume, since it has been explicitly paused. wait_for_consumer( commit_log_consumer, commit_log_producer.produce( commit_log_topic, Commit("leader", Partition(topic, 0), messages[0].get_next_offset()), ).result(), ) with assert_does_not_change(consumer.paused, [Partition(topic, 0)]): assert synchronized_consumer.poll(0) is None # Resuming the partition does not immediately cause the partition to # resume, but it should look as if it is resumed to the caller. with assert_changes(synchronized_consumer.paused, [Partition(topic, 0)], []), assert_does_not_change( consumer.paused, [Partition(topic, 0)]): synchronized_consumer.resume([Partition(topic, 0)]) # The partition should be resumed on the next poll call, however. with assert_changes(consumer.paused, [Partition(topic, 0)], []): assert synchronized_consumer.poll(0) == messages[0] # Pausing due to hitting the offset fence should not appear as a paused # partition to the caller. with assert_does_not_change(synchronized_consumer.paused, []), assert_changes( consumer.paused, [], [Partition(topic, 0)]): assert synchronized_consumer.poll(0) is None # Other pause and resume actions should not cause the inner consumer to # change its state while up against the fence. with assert_changes(synchronized_consumer.paused, [], [Partition(topic, 0)]), assert_does_not_change( consumer.paused, [Partition(topic, 0)]): synchronized_consumer.pause([Partition(topic, 0)]) with assert_changes(synchronized_consumer.paused, [Partition(topic, 0)], []), assert_does_not_change( consumer.paused, [Partition(topic, 0)]): synchronized_consumer.resume([Partition(topic, 0)])