Exemplo n.º 1
0
def test_filter() -> None:
    next_step = Mock()

    def test_function(message: Message[bool]) -> bool:
        return message.payload

    filter_step = FilterStep(test_function, next_step)

    fail_message = Message(Partition(Topic("topic"), 0), 0, False,
                           datetime.now())

    with assert_does_not_change(lambda: next_step.submit.call_count, 0):
        filter_step.submit(fail_message)

    pass_message = Message(Partition(Topic("topic"), 0), 0, True,
                           datetime.now())

    with assert_changes(lambda: next_step.submit.call_count, 0, 1):
        filter_step.submit(pass_message)

    assert next_step.submit.call_args == call(pass_message)

    with assert_changes(lambda: next_step.poll.call_count, 0, 1):
        filter_step.poll()

    with assert_changes(lambda: next_step.close.call_count, 0,
                        1), assert_changes(lambda: next_step.join.call_count,
                                           0, 1):
        filter_step.join()
Exemplo n.º 2
0
            def revocation_callback(partitions: Sequence[Partition]) -> None:
                revocation_callback.called = True
                assert partitions == [Partition(topic, 0)]
                assert consumer.tell() == {Partition(topic, 0): messages[1].offset}

                # Not sure why you'd want to do this, but it shouldn't error.
                consumer.seek({Partition(topic, 0): messages[0].offset})
Exemplo n.º 3
0
def test_stream_processor_termination_on_error() -> None:
    topic = Topic("test")

    consumer = mock.Mock()
    consumer.poll.return_value = Message(Partition(topic, 0), 0, 0,
                                         datetime.now())

    exception = NotImplementedError("error")

    strategy = mock.Mock()
    strategy.submit.side_effect = exception

    factory = mock.Mock()
    factory.create.return_value = strategy

    processor: StreamProcessor[int] = StreamProcessor(consumer, topic, factory,
                                                      TestingMetricsBackend())

    assignment_callback = consumer.subscribe.call_args.kwargs["on_assign"]
    assignment_callback({Partition(topic, 0): 0})

    with pytest.raises(Exception) as e, assert_changes(
            lambda: strategy.terminate.call_count, 0,
            1), assert_changes(lambda: consumer.close.call_count, 0, 1):
        processor.run()

    assert e.value == exception
Exemplo n.º 4
0
            def assignment_callback(partitions: Mapping[Partition, int]) -> None:
                assignment_callback.called = True
                assert partitions == {Partition(topic, 0): messages[0].offset}

                consumer.seek({Partition(topic, 0): messages[1].offset})

                with pytest.raises(ConsumerError):
                    consumer.seek({Partition(topic, 1): 0})
Exemplo n.º 5
0
def test_synchronized_consumer_handles_end_of_partition(
    broker: Broker[KafkaPayload], ) -> None:
    topic = Topic("topic")
    commit_log_topic = Topic("commit-log")

    broker.create_topic(topic, partitions=1)
    broker.create_topic(commit_log_topic, partitions=1)

    consumer = broker.get_consumer("consumer", enable_end_of_partition=True)
    producer = broker.get_producer()
    commit_log_consumer = broker.get_consumer("commit-log-consumer")

    messages = [
        producer.produce(topic, KafkaPayload(None, f"{i}".encode("utf8"),
                                             [])).result(1.0) for i in range(2)
    ]

    synchronized_consumer: Consumer[KafkaPayload] = SynchronizedConsumer(
        consumer,
        commit_log_consumer,
        commit_log_topic=commit_log_topic,
        commit_log_groups={"leader"},
    )

    with closing(synchronized_consumer):
        synchronized_consumer.subscribe([topic])

        wait_for_consumer(
            commit_log_consumer,
            producer.produce(
                commit_log_topic,
                commit_codec.encode(
                    Commit("leader", Partition(topic, 0),
                           messages[0].next_offset), ),
            ).result(),
        )

        assert synchronized_consumer.poll(0) == messages[0]

        # If the commit log consumer does not handle EOF, it will have crashed
        # here and will never return the next message.
        wait_for_consumer(
            commit_log_consumer,
            producer.produce(
                commit_log_topic,
                commit_codec.encode(
                    Commit("leader", Partition(topic, 0),
                           messages[1].next_offset), ),
            ).result(),
        )

        assert synchronized_consumer.poll(0) == messages[1]
Exemplo n.º 6
0
    def test_pause_resume_rebalancing(self) -> None:
        payloads = self.get_payloads()

        with self.get_topic(2) as topic, closing(
                self.get_producer()) as producer, closing(
                    self.get_consumer(
                        "group",
                        enable_end_of_partition=False)) as consumer_a, closing(
                            self.get_consumer(
                                "group",
                                enable_end_of_partition=False)) as consumer_b:
            messages = [
                producer.produce(Partition(topic, i),
                                 next(payloads)).result(timeout=5.0)
                for i in range(2)
            ]

            consumer_a.subscribe([topic])

            # It doesn't really matter which message is fetched first -- we
            # just want to know the assignment occurred.
            assert (consumer_a.poll(10.0)
                    in messages)  # XXX: getting the subcription is slow

            assert len(consumer_a.tell()) == 2
            assert len(consumer_b.tell()) == 0

            # Pause all partitions.
            consumer_a.pause([Partition(topic, 0), Partition(topic, 1)])
            assert set(consumer_a.paused()) == set(
                [Partition(topic, 0), Partition(topic, 1)])

            consumer_b.subscribe([topic])
            for i in range(10):
                assert consumer_a.poll(
                    0) is None  # attempt to force session timeout
                if consumer_b.poll(1.0) is not None:
                    break
            else:
                assert False, "rebalance did not occur"

            # The first consumer should have had its offsets rolled back, as
            # well as should have had it's partition resumed during
            # rebalancing.
            assert consumer_a.paused() == []
            assert consumer_a.poll(10.0) is not None

            assert len(consumer_a.tell()) == 1
            assert len(consumer_b.tell()) == 1
Exemplo n.º 7
0
def test_synchronized_consumer_handles_end_of_partition() -> None:
    topic = Topic("topic")
    commit_log_topic = Topic("commit-log")

    broker: DummyBroker[int] = DummyBroker()
    broker.create_topic(topic, partitions=1)
    consumer: Consumer[int] = DummyConsumer(broker, "consumer")
    producer: Producer[int] = DummyProducer(broker)
    messages = [producer.produce(topic, i).result(1.0) for i in range(2)]

    commit_log_broker: DummyBroker[Commit] = DummyBroker()
    commit_log_broker.create_topic(commit_log_topic, partitions=1)
    commit_log_consumer: Consumer[Commit] = DummyConsumer(
        commit_log_broker, "commit-log-consumer", enable_end_of_partition=True)
    commit_log_producer: Producer[Commit] = DummyProducer(commit_log_broker)

    synchronized_consumer: Consumer[int] = SynchronizedConsumer(
        consumer,
        commit_log_consumer,
        commit_log_topic=commit_log_topic,
        commit_log_groups={"leader"},
    )

    with closing(synchronized_consumer):
        synchronized_consumer.subscribe([topic])

        wait_for_consumer(
            commit_log_consumer,
            commit_log_producer.produce(
                commit_log_topic,
                Commit("leader", Partition(topic, 0),
                       messages[0].get_next_offset()),
            ).result(),
        )

        assert synchronized_consumer.poll(0) == messages[0]

        # If the commit log consumer does not handle EOF, it will have crashed
        # here and will never return the next message.
        wait_for_consumer(
            commit_log_consumer,
            commit_log_producer.produce(
                commit_log_topic,
                Commit("leader", Partition(topic, 0),
                       messages[1].get_next_offset()),
            ).result(),
        )

        assert synchronized_consumer.poll(0) == messages[1]
Exemplo n.º 8
0
    def test_consumer_offset_out_of_range(self) -> None:
        payloads = self.get_payloads()

        with self.get_topic() as topic:
            with closing(self.get_producer()) as producer:
                messages = [
                    producer.produce(topic, next(payloads)).result(5.0)
                ]

            consumer = self.get_consumer()
            consumer.subscribe([topic])

            for i in range(5):
                message = consumer.poll(1.0)
                if message is not None:
                    break
                else:
                    time.sleep(1.0)
            else:
                raise Exception("assignment never received")

            with pytest.raises(EndOfPartition):
                consumer.poll()

            # Somewhat counterintuitively, seeking to an invalid position
            # should be allowed -- we don't know it's invalid until we try and
            # read from it.
            consumer.seek(
                {Partition(topic, 0): messages[-1].next_offset + 1000})

            with pytest.raises(OffsetOutOfRange):
                consumer.poll()
Exemplo n.º 9
0
    def eventstream(*, dataset: Dataset):
        ensure_table_exists(dataset)
        record = json.loads(http_request.data)

        version = record[0]
        if version != 2:
            raise RuntimeError("Unsupported protocol version: %s" % record)

        message: Message[KafkaPayload] = Message(
            Partition(Topic("topic"), 0),
            0,
            KafkaPayload(None, http_request.data),
            datetime.now(),
        )

        type_ = record[1]
        metrics = DummyMetricsBackend()
        if type_ == "insert":
            from snuba.consumer import ConsumerWorker

            worker = ConsumerWorker(dataset, metrics=metrics)
        else:
            from snuba.replacer import ReplacerWorker

            worker = ReplacerWorker(clickhouse_rw, dataset, metrics=metrics)

        processed = worker.process_message(message)
        if processed is not None:
            batch = [processed]
            worker.flush_batch(batch)

        return ("ok", 200, {"Content-Type": "text/plain"})
Exemplo n.º 10
0
    def test_delete_groups_insert(self):
        self.event["project_id"] = self.project_id
        self.event["group_id"] = 1
        self.write_raw_events(self.event)

        assert self._issue_count(self.project_id) == [{"count": 1, "group_id": 1}]

        timestamp = datetime.now(tz=pytz.utc)

        project_id = self.project_id

        message: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            42,
            KafkaPayload(
                None,
                json.dumps(
                    (
                        2,
                        "end_delete_groups",
                        {
                            "project_id": project_id,
                            "group_ids": [1],
                            "datetime": timestamp.strftime(PAYLOAD_DATETIME_FORMAT),
                        },
                    )
                ).encode("utf-8"),
            ),
            datetime.now(),
        )

        processed = self.replacer.process_message(message)
        self.replacer.flush_batch([processed])

        assert self._issue_count(self.project_id) == []
Exemplo n.º 11
0
    def produce(self, destination: Union[Topic, Partition],
                payload: TPayload) -> Future[Message[TPayload]]:
        with self.__lock:
            assert not self.__closed

            partition: Partition
            if isinstance(destination, Topic):
                partition = Partition(
                    destination,
                    random.randint(
                        0,
                        self.__broker.get_topic_partition_count(destination) -
                        1),
                )
            elif isinstance(destination, Partition):
                partition = destination
            else:
                raise TypeError("invalid destination type")

            future: Future[Message[TPayload]] = Future()
            future.set_running_or_notify_cancel()
            try:
                message = self.__broker.produce(partition, payload)
                future.set_result(message)
            except Exception as e:
                future.set_exception(e)
            return future
Exemplo n.º 12
0
    def subscribe(self, consumer: LocalConsumer[TPayload],
                  topics: Sequence[Topic]) -> Mapping[Partition, int]:
        with self.__lock:
            if self.__subscriptions[consumer.group]:
                # XXX: Consumer group balancing is not currently implemented.
                if consumer not in self.__subscriptions[consumer.group]:
                    raise NotImplementedError

                # XXX: Updating an existing subscription is currently not implemented.
                if self.__subscriptions[consumer.group][consumer] != topics:
                    raise NotImplementedError

            self.__subscriptions[consumer.group][consumer] = topics

            assignment: MutableMapping[Partition, int] = {}

            for topic in set(topics):
                partition_count = self.__message_storage.get_partition_count(
                    topic)

                for index in range(partition_count):
                    partition = Partition(topic, index)
                    # TODO: Handle offset reset more realistically.
                    assignment[partition] = self.__offsets[consumer.group].get(
                        partition, 0)

        return assignment
Exemplo n.º 13
0
def test_transform() -> None:
    next_step = Mock()

    def transform_function(message: Message[int]) -> int:
        return message.payload * 2

    transform_step = TransformStep(transform_function, next_step)

    original_message = Message(Partition(Topic("topic"), 0), 0, 1,
                               datetime.now())

    with assert_changes(lambda: next_step.submit.call_count, 0, 1):
        transform_step.submit(original_message)

    assert next_step.submit.call_args == call(
        Message(
            original_message.partition,
            original_message.offset,
            transform_function(original_message),
            original_message.timestamp,
        ))

    with assert_changes(lambda: next_step.poll.call_count, 0, 1):
        transform_step.poll()

    with assert_changes(lambda: next_step.close.call_count, 0,
                        1), assert_changes(lambda: next_step.join.call_count,
                                           0, 1):
        transform_step.join()
Exemplo n.º 14
0
 def _wrap(self, msg: str) -> Message[KafkaPayload]:
     return Message(
         Partition(Topic("replacements"), 0),
         0,
         KafkaPayload(None, json.dumps(msg).encode("utf-8")),
         datetime.now(),
     )
Exemplo n.º 15
0
    def test_offsets(self):
        event = self.event

        message: Message[KafkaPayload] = Message(
            Partition(Topic("events"), 456),
            123,
            KafkaPayload(
                None, json.dumps((0, "insert", event)).encode("utf-8")
            ),  # event doesn't really matter
            datetime.now(),
        )

        test_worker = ConsumerWorker(
            self.dataset,
            producer=FakeConfluentKafkaProducer(),
            replacements_topic=Topic(
                enforce_table_writer(self.dataset)
                .get_stream_loader()
                .get_replacement_topic_spec()
                .topic_name
            ),
            metrics=self.metrics,
        )
        batch = [test_worker.process_message(message)]
        test_worker.flush_batch(batch)

        assert self.clickhouse.execute(
            "SELECT project_id, event_id, offset, partition FROM %s" % self.table
        ) == [(self.event["project_id"], self.event["event_id"], 123, 456)]
Exemplo n.º 16
0
    def __delivery_callback(
        self,
        future: Future[Message[KafkaPayload]],
        payload: KafkaPayload,
        error: KafkaError,
        message: ConfluentMessage,
    ) -> None:
        if error is not None:
            future.set_exception(TransportError(error))
        else:
            try:
                timestamp_type, timestamp_value = message.timestamp()
                if timestamp_type is TIMESTAMP_NOT_AVAILABLE:
                    raise ValueError("timestamp not available")

                future.set_result(
                    Message(
                        Partition(Topic(message.topic()), message.partition()),
                        message.offset(),
                        payload,
                        datetime.utcfromtimestamp(timestamp_value / 1000.0),
                    )
                )
            except Exception as error:
                future.set_exception(error)
Exemplo n.º 17
0
    def test_skip_too_old(self):
        test_worker = ConsumerWorker(
            self.dataset,
            producer=FakeConfluentKafkaProducer(),
            replacements_topic=Topic(
                enforce_table_writer(self.dataset)
                .get_stream_loader()
                .get_replacement_topic_spec()
                .topic_name
            ),
            metrics=self.metrics,
        )

        event = self.event
        old_timestamp = datetime.utcnow() - timedelta(days=300)
        old_timestamp_str = old_timestamp.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
        event["datetime"] = old_timestamp_str
        event["data"]["datetime"] = old_timestamp_str
        event["data"]["received"] = int(calendar.timegm(old_timestamp.timetuple()))

        message: Message[KafkaPayload] = Message(
            Partition(Topic("events"), 1),
            42,
            KafkaPayload(None, json.dumps((0, "insert", event)).encode("utf-8")),
            datetime.now(),
        )

        assert test_worker.process_message(message) is None
Exemplo n.º 18
0
    def test_send_message(
        self,
        value: str,
        expected: Optional[ProcessedMessage],
    ) -> None:
        storage = get_storage("groupedmessages")
        snapshot_id = uuid1()
        transact_data = TransactionData(xmin=100,
                                        xmax=200,
                                        xip_list=[120, 130])

        worker = SnapshotAwareWorker(
            storage=storage,
            producer=FakeConfluentKafkaProducer(),
            snapshot_id=str(snapshot_id),
            transaction_data=transact_data,
            replacements_topic=None,
            metrics=DummyMetricsBackend(strict=True),
        )

        message: Message[KafkaPayload] = Message(
            Partition(Topic("topic"), 0),
            1,
            KafkaPayload(
                None,
                value.encode("utf-8"),
                [("table", "sentry_groupedmessage".encode())],
            ),
            datetime.now(),
        )

        ret = worker.process_message(message)
        assert ret == expected
Exemplo n.º 19
0
    def test_commit_log_consumer(self) -> None:
        # XXX: This would be better as an integration test (or at least a test
        # against an abstract Producer interface) instead of against a test against
        # a mock.
        commit_log_producer = FakeConfluentKafkaProducer()

        consumer: KafkaConsumer[int] = KafkaConsumerWithCommitLog(
            {
                **self.configuration,
                "auto.offset.reset": "earliest",
                "enable.auto.commit": "false",
                "enable.auto.offset.store": "false",
                "enable.partition.eof": "true",
                "group.id": "test",
                "session.timeout.ms": 10000,
            },
            codec=self.codec,
            producer=commit_log_producer,
            commit_log_topic=Topic("commit-log"),
        )

        with self.get_topic() as topic, closing(consumer) as consumer:
            consumer.subscribe([topic])

            with closing(self.get_producer()) as producer:
                producer.produce(topic, 0).result(5.0)

            message = consumer.poll(
                10.0)  # XXX: getting the subscription is slow
            assert isinstance(message, Message)

            consumer.stage_offsets(
                {message.partition: message.get_next_offset()})

            assert consumer.commit_offsets() == {
                Partition(topic, 0): message.get_next_offset()
            }

            assert len(commit_log_producer.messages) == 1
            commit_message = commit_log_producer.messages[0]
            assert commit_message.topic() == "commit-log"

            assert CommitCodec().decode(
                KafkaPayload(commit_message.key(),
                             commit_message.value())) == Commit(
                                 "test", Partition(topic, 0),
                                 message.get_next_offset())
Exemplo n.º 20
0
 def __make_msg(self, partition: int, offset: int, payload: str,
                headers: Headers) -> Message[KafkaPayload]:
     return Message(
         partition=Partition(Topic("topic"), partition),
         offset=offset,
         payload=KafkaPayload(b"key", payload.encode(), headers),
         timestamp=datetime(2019, 6, 19, 6, 46, 28),
     )
Exemplo n.º 21
0
 def unsubscribe(self,
                 consumer: LocalConsumer[TPayload]) -> Sequence[Partition]:
     with self.__lock:
         partitions: MutableSequence[Partition] = []
         for topic in self.__subscriptions[consumer.group].pop(consumer):
             partitions.extend(
                 Partition(topic, i) for i in range(
                     self.__message_storage.get_partition_count(topic)))
         return partitions
Exemplo n.º 22
0
    def test_unaligned_offset(self) -> None:
        topic = Topic(uuid.uuid1().hex)
        partition = Partition(topic, 0)
        self.storage.create_topic(topic, 1)

        message = self.storage.produce(partition, 1, datetime.now())

        invalid_offset = message.offset + 4
        assert message.next_offset > invalid_offset > message.offset

        with pytest.raises(InvalidChecksum):
            self.storage.consume(partition, invalid_offset)
Exemplo n.º 23
0
def test_parallel_transform_worker_apply() -> None:
    messages = [
        Message(
            Partition(Topic("test"), 0),
            i,
            KafkaPayload(None, b"\x00" * size, None),
            datetime.now(),
        ) for i, size in enumerate([1000, 1000, 2000, 4000])
    ]

    with SharedMemoryManager() as smm:
        input_block = smm.SharedMemory(8192)
        assert input_block.size == 8192

        input_batch = MessageBatch(input_block)
        for message in messages:
            input_batch.append(message)

        assert len(input_batch) == 4

        output_block = smm.SharedMemory(4096)
        assert output_block.size == 4096

        index, output_batch = parallel_transform_worker_apply(
            transform_payload_expand,
            input_batch,
            output_block,
        )

        # The first batch should be able to fit 2 messages.
        assert index == 2
        assert len(output_batch) == 2

        index, output_batch = parallel_transform_worker_apply(
            transform_payload_expand,
            input_batch,
            output_block,
            index,
        )

        # The second batch should be able to fit one message.
        assert index == 3
        assert len(output_batch) == 1

        # The last message is too large to fit in the batch.
        with pytest.raises(ValueTooLarge):
            parallel_transform_worker_apply(
                transform_payload_expand,
                input_batch,
                output_block,
                index,
            )
Exemplo n.º 24
0
    def decode(self, value: KafkaPayload) -> Commit:
        key = value.key
        if not isinstance(key, bytes):
            raise TypeError("payload key must be a bytes object")

        val = value.value
        if not isinstance(val, bytes):
            raise TypeError("payload value must be a bytes object")

        topic_name, partition_index, group = key.decode("utf-8").split(":", 3)
        offset = int(val.decode("utf-8"))
        return Commit(group, Partition(Topic(topic_name),
                                       int(partition_index)), offset)
Exemplo n.º 25
0
    def test_delete_tag_promoted_insert(self):
        self.event["project_id"] = self.project_id
        self.event["group_id"] = 1
        self.event["data"]["tags"].append(["browser.name", "foo"])
        self.event["data"]["tags"].append(["notbrowser", "foo"])
        self.write_raw_events(self.event)

        project_id = self.project_id

        def _issue_count(total=False):
            return json.loads(
                self.app.post(
                    "/query",
                    data=json.dumps({
                        "project": [project_id],
                        "aggregations": [["count()", "", "count"]],
                        "conditions": [["tags[browser.name]", "=", "foo"]]
                        if not total else [],
                        "groupby": ["group_id"],
                    }),
                ).data)["data"]

        assert _issue_count() == [{"count": 1, "group_id": 1}]
        assert _issue_count(total=True) == [{"count": 1, "group_id": 1}]

        timestamp = datetime.now(tz=pytz.utc)

        message: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            42,
            KafkaPayload(
                None,
                json.dumps((
                    2,
                    "end_delete_tag",
                    {
                        "project_id": project_id,
                        "tag": "browser.name",
                        "datetime":
                        timestamp.strftime(PAYLOAD_DATETIME_FORMAT),
                    },
                )).encode("utf-8"),
            ),
            datetime.now(),
        )

        processed = self.replacer.process_message(message)
        self.replacer.flush_batch([processed])

        assert _issue_count() == []
        assert _issue_count(total=True) == [{"count": 1, "group_id": 1}]
Exemplo n.º 26
0
def get_messages(events_file) -> Sequence[Message[KafkaPayload]]:
    "Create a fake Kafka message for each JSON event in the file."
    messages: MutableSequence[Message[KafkaPayload]] = []
    raw_events = open(events_file).readlines()
    for raw_event in raw_events:
        messages.append(
            Message(
                Partition(Topic("events"), 1),
                0,
                KafkaPayload(None, raw_event.encode("utf-8")),
                datetime.now(),
            ),
        )
    return messages
Exemplo n.º 27
0
    def test_auto_offset_reset_latest(self) -> None:
        with self.get_topic() as topic:
            with closing(self.get_producer()) as producer:
                producer.produce(topic, next(self.get_payloads())).result(5.0)

            with closing(
                    self.get_consumer(auto_offset_reset="latest")) as consumer:
                consumer.subscribe([topic])

                try:
                    consumer.poll(10.0)  # XXX: getting the subcription is slow
                except EndOfPartition as error:
                    assert error.partition == Partition(topic, 0)
                    assert error.offset == 1
                else:
                    raise AssertionError("expected EndOfPartition error")
Exemplo n.º 28
0
        def assignment_callback(
            consumer: ConfluentConsumer, partitions: Sequence[ConfluentTopicPartition]
        ) -> None:
            self.__state = KafkaConsumerState.ASSIGNING

            try:
                assignment: MutableSequence[ConfluentTopicPartition] = []

                for partition in self.__consumer.committed(partitions):
                    if partition.offset >= 0:
                        assignment.append(partition)
                    elif partition.offset == OFFSET_INVALID:
                        assignment.append(
                            self.__resolve_partition_starting_offset(partition)
                        )
                    else:
                        raise ValueError("received unexpected offset")

                offsets: MutableMapping[Partition, int] = {
                    Partition(Topic(i.topic), i.partition): i.offset for i in assignment
                }
                self.__seek(offsets)

                # Ensure that all partitions are resumed on assignment to avoid
                # carrying over state from a previous assignment.
                self.__consumer.resume(
                    [
                        ConfluentTopicPartition(
                            partition.topic.name, partition.index, offset
                        )
                        for partition, offset in offsets.items()
                    ]
                )

                for partition in offsets:
                    self.__paused.discard(partition)
            except Exception:
                self.__state = KafkaConsumerState.ERROR
                raise

            try:
                if on_assign is not None:
                    on_assign(offsets)
            finally:
                self.__state = KafkaConsumerState.CONSUMING
Exemplo n.º 29
0
def test_collect() -> None:
    step_factory = Mock()
    step_factory.return_value = inner_step = Mock()

    commit_function = Mock()
    partition = Partition(Topic("topic"), 0)
    messages = message_generator(partition, 0)

    collect_step = CollectStep(step_factory, commit_function, 2, 60)

    # A batch should be started the first time the step receives a message.
    with assert_changes(lambda: step_factory.call_count, 0, 1):
        collect_step.poll()
        collect_step.submit(next(messages))  # offset 0

    # Subsequent messages should reuse the existing batch, ...
    with assert_does_not_change(lambda: step_factory.call_count, 1):
        collect_step.poll()
        collect_step.submit(next(messages))  # offset 1

    # ...until we hit the batch size limit.
    with assert_changes(lambda: inner_step.close.call_count,
                        0, 1), assert_changes(
                            lambda: inner_step.join.call_count, 0,
                            1), assert_changes(
                                lambda: commit_function.call_count, 0, 1):
        collect_step.poll()
        assert commit_function.call_args == call({partition: 2})

    step_factory.return_value = inner_step = Mock()

    # The next message should create a new batch.
    with assert_changes(lambda: step_factory.call_count, 1, 2):
        collect_step.submit(next(messages))

    with assert_changes(lambda: inner_step.close.call_count, 0, 1):
        collect_step.close()

    with assert_changes(lambda: inner_step.join.call_count, 0,
                        1), assert_changes(lambda: commit_function.call_count,
                                           1, 2):
        collect_step.join()
Exemplo n.º 30
0
def test_message_batch() -> None:
    partition = Partition(Topic("test"), 0)

    with SharedMemoryManager() as smm:
        block = smm.SharedMemory(4096)
        assert block.size == 4096

        message = Message(partition, 0, KafkaPayload(None, b"\x00" * 4000,
                                                     None), datetime.now())

        batch: MessageBatch[KafkaPayload] = MessageBatch(block)
        with assert_changes(lambda: len(batch), 0, 1):
            batch.append(message)

        assert batch[0] == message
        assert list(batch) == [message]

        with assert_does_not_change(lambda: len(batch),
                                    1), pytest.raises(ValueTooLarge):
            batch.append(message)