Exemplo n.º 1
0
def test_synchronized_consumer_worker_crash_before_assignment(
    broker: Broker[KafkaPayload], ) -> None:
    topic = Topic("topic")
    commit_log_topic = Topic("commit-log")

    broker.create_topic(topic, partitions=1)
    broker.create_topic(commit_log_topic, partitions=1)

    poll_called = Event()

    class BrokenConsumerException(Exception):
        pass

    class BrokenConsumer(LocalConsumer[KafkaPayload]):
        def poll(
            self,
            timeout: Optional[float] = None
        ) -> Optional[Message[KafkaPayload]]:
            try:
                raise BrokenConsumerException()
            finally:
                poll_called.set()

    consumer = broker.get_consumer("consumer")
    commit_log_consumer: Consumer[KafkaPayload] = BrokenConsumer(
        broker, "commit-log-consumer")

    with pytest.raises(BrokenConsumerException):
        Consumer[KafkaPayload] = SynchronizedConsumer(
            consumer,
            commit_log_consumer,
            commit_log_topic=commit_log_topic,
            commit_log_groups={"leader"},
        )
Exemplo n.º 2
0
def test_synchronized_consumer_worker_crash_after_assignment(
    broker: Broker[KafkaPayload], ) -> None:
    topic = Topic("topic")
    commit_log_topic = Topic("commit-log")

    broker.create_topic(topic, partitions=1)
    broker.create_topic(commit_log_topic, partitions=1)

    poll_called = Event()

    class BrokenConsumerException(Exception):
        pass

    class BrokenConsumer(LocalConsumer[KafkaPayload]):
        def poll(
            self,
            timeout: Optional[float] = None
        ) -> Optional[Message[KafkaPayload]]:
            if not self.tell():
                return super().poll(timeout)
            else:
                try:
                    raise BrokenConsumerException()
                finally:
                    poll_called.set()

    consumer: Consumer[KafkaPayload] = broker.get_consumer("consumer")
    commit_log_consumer: Consumer[KafkaPayload] = BrokenConsumer(
        broker, "commit-log-consumer")

    synchronized_consumer: Consumer[KafkaPayload] = SynchronizedConsumer(
        consumer,
        commit_log_consumer,
        commit_log_topic=commit_log_topic,
        commit_log_groups={"leader"},
    )

    assert poll_called.wait(1.0) is True

    # If the worker thread has exited without a close request, calling ``poll``
    # should raise an error that originated from the worker thread.

    with pytest.raises(RuntimeError) as e:
        synchronized_consumer.poll(0.0)

    assert type(e.value.__cause__) is BrokenConsumerException

    # If a close request has been sent, the normal runtime error due to the
    # closed consumer should be raised instead.

    synchronized_consumer.close()

    with pytest.raises(RuntimeError) as e:
        synchronized_consumer.poll(0.0)

    assert type(e.value.__cause__) is not BrokenConsumerException
Exemplo n.º 3
0
def test_synchronized_consumer_handles_end_of_partition(
    broker: Broker[KafkaPayload], ) -> None:
    topic = Topic("topic")
    commit_log_topic = Topic("commit-log")

    broker.create_topic(topic, partitions=1)
    broker.create_topic(commit_log_topic, partitions=1)

    consumer = broker.get_consumer("consumer", enable_end_of_partition=True)
    producer = broker.get_producer()
    commit_log_consumer = broker.get_consumer("commit-log-consumer")

    messages = [
        producer.produce(topic, KafkaPayload(None, f"{i}".encode("utf8"),
                                             [])).result(1.0) for i in range(2)
    ]

    synchronized_consumer: Consumer[KafkaPayload] = SynchronizedConsumer(
        consumer,
        commit_log_consumer,
        commit_log_topic=commit_log_topic,
        commit_log_groups={"leader"},
    )

    with closing(synchronized_consumer):
        synchronized_consumer.subscribe([topic])

        wait_for_consumer(
            commit_log_consumer,
            producer.produce(
                commit_log_topic,
                commit_codec.encode(
                    Commit("leader", Partition(topic, 0),
                           messages[0].next_offset), ),
            ).result(),
        )

        assert synchronized_consumer.poll(0) == messages[0]

        # If the commit log consumer does not handle EOF, it will have crashed
        # here and will never return the next message.
        wait_for_consumer(
            commit_log_consumer,
            producer.produce(
                commit_log_topic,
                commit_codec.encode(
                    Commit("leader", Partition(topic, 0),
                           messages[1].next_offset), ),
            ).result(),
        )

        assert synchronized_consumer.poll(0) == messages[1]
Exemplo n.º 4
0
def test_synchronized_consumer_handles_end_of_partition() -> None:
    topic = Topic("topic")
    commit_log_topic = Topic("commit-log")

    broker: DummyBroker[int] = DummyBroker()
    broker.create_topic(topic, partitions=1)
    consumer: Consumer[int] = DummyConsumer(broker, "consumer")
    producer: Producer[int] = DummyProducer(broker)
    messages = [producer.produce(topic, i).result(1.0) for i in range(2)]

    commit_log_broker: DummyBroker[Commit] = DummyBroker()
    commit_log_broker.create_topic(commit_log_topic, partitions=1)
    commit_log_consumer: Consumer[Commit] = DummyConsumer(
        commit_log_broker, "commit-log-consumer", enable_end_of_partition=True)
    commit_log_producer: Producer[Commit] = DummyProducer(commit_log_broker)

    synchronized_consumer: Consumer[int] = SynchronizedConsumer(
        consumer,
        commit_log_consumer,
        commit_log_topic=commit_log_topic,
        commit_log_groups={"leader"},
    )

    with closing(synchronized_consumer):
        synchronized_consumer.subscribe([topic])

        wait_for_consumer(
            commit_log_consumer,
            commit_log_producer.produce(
                commit_log_topic,
                Commit("leader", Partition(topic, 0),
                       messages[0].get_next_offset()),
            ).result(),
        )

        assert synchronized_consumer.poll(0) == messages[0]

        # If the commit log consumer does not handle EOF, it will have crashed
        # here and will never return the next message.
        wait_for_consumer(
            commit_log_consumer,
            commit_log_producer.produce(
                commit_log_topic,
                Commit("leader", Partition(topic, 0),
                       messages[1].get_next_offset()),
            ).result(),
        )

        assert synchronized_consumer.poll(0) == messages[1]
Exemplo n.º 5
0
def test_synchronized_consumer(broker: Broker[KafkaPayload]) -> None:
    topic = Topic("topic")
    commit_log_topic = Topic("commit-log")

    broker.create_topic(topic, partitions=1)
    broker.create_topic(commit_log_topic, partitions=1)

    consumer = broker.get_consumer("consumer")
    producer = broker.get_producer()
    commit_log_consumer = broker.get_consumer("commit-log-consumer")

    messages = [
        producer.produce(topic, KafkaPayload(None, f"{i}".encode("utf8"),
                                             [])).result(1.0) for i in range(6)
    ]

    synchronized_consumer: Consumer[KafkaPayload] = SynchronizedConsumer(
        consumer,
        commit_log_consumer,
        commit_log_topic=commit_log_topic,
        commit_log_groups={"leader-a", "leader-b"},
    )

    with closing(synchronized_consumer):
        synchronized_consumer.subscribe([topic])

        # The consumer should not consume any messages until it receives a
        # commit from both groups that are being followed.
        with assert_changes(consumer.paused, [],
                            [Partition(topic, 0)]), assert_changes(
                                consumer.tell, {},
                                {Partition(topic, 0): messages[0].offset}):
            assert synchronized_consumer.poll(0.0) is None

        wait_for_consumer(
            commit_log_consumer,
            producer.produce(
                commit_log_topic,
                commit_codec.encode(
                    Commit("leader-a", Partition(topic, 0),
                           messages[0].next_offset)),
            ).result(),
        )

        # The consumer should remain paused, since it needs both groups to
        # advance before it may continue.
        with assert_does_not_change(
                consumer.paused,
            [Partition(topic, 0)]), assert_does_not_change(
                consumer.tell, {Partition(topic, 0): messages[0].offset}):
            assert synchronized_consumer.poll(0.0) is None

        wait_for_consumer(
            commit_log_consumer,
            producer.produce(
                commit_log_topic,
                commit_codec.encode(
                    Commit("leader-b", Partition(topic, 0),
                           messages[0].next_offset)),
            ).result(),
        )

        # The consumer should be able to resume consuming, since both consumers
        # have processed the first message.
        with assert_changes(consumer.paused, [Partition(topic, 0)],
                            []), assert_changes(
                                consumer.tell,
                                {Partition(topic, 0): messages[0].offset},
                                {Partition(topic, 0): messages[0].next_offset},
                            ):
            assert synchronized_consumer.poll(0.0) == messages[0]

        # After consuming the one available message, the consumer should be
        # paused again until the remote offsets advance.
        with assert_changes(consumer.paused, [],
                            [Partition(topic, 0)]), assert_does_not_change(
                                consumer.tell,
                                {Partition(topic, 0): messages[1].offset}):
            assert synchronized_consumer.poll(0.0) is None

        # Emulate the unlikely (but possible) scenario of the leader offsets
        # being within a series of compacted (deleted) messages by:
        # 1. moving the remote offsets forward, so that the partition is resumed
        # 2. seeking the consumer beyond the remote offsets

        producer.produce(
            commit_log_topic,
            commit_codec.encode(
                Commit("leader-a", Partition(topic, 0), messages[3].offset)),
        ).result()

        wait_for_consumer(
            commit_log_consumer,
            producer.produce(
                commit_log_topic,
                commit_codec.encode(
                    Commit("leader-b", Partition(topic, 0),
                           messages[5].offset)),
            ).result(),
        )

        # The consumer should be able to resume consuming, since both consumers
        # have processed the first message.
        with assert_changes(consumer.paused, [Partition(topic, 0)],
                            []), assert_changes(
                                consumer.tell,
                                {Partition(topic, 0): messages[1].offset},
                                {Partition(topic, 0): messages[1].next_offset},
                            ):
            assert synchronized_consumer.poll(0.0) == messages[1]

        # At this point, we manually seek the consumer offset, to emulate messages being skipped.
        with assert_changes(
                consumer.tell,
            {Partition(topic, 0): messages[2].offset},
            {Partition(topic, 0): messages[4].offset},
        ):
            consumer.seek({Partition(topic, 0): messages[4].offset})

        # Since the (effective) remote offset is the offset for message #3 (via
        # ``leader-a``), and the local offset is the offset of message #4, when
        # message #4 is consumed, it should be discarded and the offset should
        # be rolled back to wait for the commit log to advance.
        with assert_changes(consumer.paused, [],
                            [Partition(topic, 0)]), assert_does_not_change(
                                consumer.tell,
                                {Partition(topic, 0): messages[4].offset}):
            assert synchronized_consumer.poll(0.0) is None

        wait_for_consumer(
            commit_log_consumer,
            producer.produce(
                commit_log_topic,
                commit_codec.encode(
                    Commit("leader-a", Partition(topic, 0),
                           messages[5].offset)),
            ).result(),
        )

        # The consumer should be able to resume consuming.
        with assert_changes(consumer.paused, [Partition(topic, 0)],
                            []), assert_changes(
                                consumer.tell,
                                {Partition(topic, 0): messages[4].offset},
                                {Partition(topic, 0): messages[4].next_offset},
                            ):
            assert synchronized_consumer.poll(0.0) == messages[4]
Exemplo n.º 6
0
def test_synchronized_consumer_pause_resume(
        broker: Broker[KafkaPayload]) -> None:
    topic = Topic("topic")
    commit_log_topic = Topic("commit-log")

    broker.create_topic(topic, partitions=1)
    broker.create_topic(commit_log_topic, partitions=1)

    consumer = broker.get_consumer("consumer")
    producer = broker.get_producer()
    commit_log_consumer = broker.get_consumer("commit-log-consumer")

    messages = [
        producer.produce(topic, KafkaPayload(None, f"{i}".encode("utf8"),
                                             [])).result(1.0) for i in range(2)
    ]

    synchronized_consumer: Consumer[KafkaPayload] = SynchronizedConsumer(
        consumer,
        commit_log_consumer,
        commit_log_topic=commit_log_topic,
        commit_log_groups={"leader"},
    )

    with closing(synchronized_consumer):

        def assignment_callback(offsets: Mapping[Partition, int]) -> None:
            synchronized_consumer.pause([Partition(topic, 0)])

        synchronized_consumer.subscribe([topic], on_assign=assignment_callback)

        with assert_changes(synchronized_consumer.paused, [],
                            [Partition(topic, 0)]), assert_changes(
                                consumer.paused, [], [Partition(topic, 0)]):
            assert synchronized_consumer.poll(0.0) is None

        # Advancing the commit log offset should not cause the consumer to
        # resume, since it has been explicitly paused.
        wait_for_consumer(
            commit_log_consumer,
            producer.produce(
                commit_log_topic,
                commit_codec.encode(
                    Commit("leader", Partition(topic, 0),
                           messages[0].next_offset)),
            ).result(),
        )

        with assert_does_not_change(consumer.paused, [Partition(topic, 0)]):
            assert synchronized_consumer.poll(0) is None

        # Resuming the partition does not immediately cause the partition to
        # resume, but it should look as if it is resumed to the caller.
        with assert_changes(synchronized_consumer.paused,
                            [Partition(topic, 0)], []), assert_does_not_change(
                                consumer.paused, [Partition(topic, 0)]):
            synchronized_consumer.resume([Partition(topic, 0)])

        # The partition should be resumed on the next poll call, however.
        with assert_changes(consumer.paused, [Partition(topic, 0)], []):
            assert synchronized_consumer.poll(0) == messages[0]

        # Pausing due to hitting the offset fence should not appear as a paused
        # partition to the caller.
        with assert_does_not_change(synchronized_consumer.paused,
                                    []), assert_changes(
                                        consumer.paused, [],
                                        [Partition(topic, 0)]):
            assert synchronized_consumer.poll(0) is None

        # Other pause and resume actions should not cause the inner consumer to
        # change its state while up against the fence.
        with assert_changes(synchronized_consumer.paused, [],
                            [Partition(topic, 0)]), assert_does_not_change(
                                consumer.paused, [Partition(topic, 0)]):
            synchronized_consumer.pause([Partition(topic, 0)])

        with assert_changes(synchronized_consumer.paused,
                            [Partition(topic, 0)], []), assert_does_not_change(
                                consumer.paused, [Partition(topic, 0)]):
            synchronized_consumer.resume([Partition(topic, 0)])
Exemplo n.º 7
0
def subscriptions(
    *,
    dataset_name: str,
    topic: Optional[str],
    partitions: Optional[int],
    commit_log_topic: Optional[str],
    commit_log_groups: Sequence[str],
    consumer_group: str,
    auto_offset_reset: str,
    bootstrap_servers: Sequence[str],
    max_batch_size: int,
    max_batch_time_ms: int,
    max_query_workers: Optional[int],
    schedule_ttl: int,
    result_topic: Optional[str],
    log_level: Optional[str],
    delay_seconds: Optional[int],
) -> None:
    """Evaluates subscribed queries for a dataset."""

    assert result_topic is not None

    setup_logging(log_level)
    setup_sentry()

    dataset = get_dataset(dataset_name)

    if not bootstrap_servers:
        storage = dataset.get_default_entity().get_writable_storage()
        assert storage is not None
        storage_key = storage.get_storage_key().value
        bootstrap_servers = settings.DEFAULT_STORAGE_BROKERS.get(
            storage_key, settings.DEFAULT_BROKERS)

    loader = enforce_table_writer(dataset).get_stream_loader()

    metrics = MetricsWrapper(
        environment.metrics,
        "subscriptions",
        tags={
            "group": consumer_group,
            "dataset": dataset_name
        },
    )

    consumer = TickConsumer(
        SynchronizedConsumer(
            KafkaConsumer(
                build_kafka_consumer_configuration(
                    bootstrap_servers,
                    consumer_group,
                    auto_offset_reset=auto_offset_reset,
                ), ),
            KafkaConsumer(
                build_kafka_consumer_configuration(
                    bootstrap_servers,
                    f"subscriptions-commit-log-{uuid.uuid1().hex}",
                    auto_offset_reset="earliest",
                ), ),
            (Topic(commit_log_topic) if commit_log_topic is not None else
             Topic(loader.get_commit_log_topic_spec().topic_name)),
            set(commit_log_groups),
        ),
        time_shift=(timedelta(seconds=delay_seconds *
                              -1) if delay_seconds is not None else None),
    )

    producer = ProducerEncodingWrapper(
        KafkaProducer({
            "bootstrap.servers": ",".join(bootstrap_servers),
            "partitioner": "consistent",
            "message.max.bytes": 50000000,  # 50MB, default is 1MB
        }),
        SubscriptionTaskResultEncoder(),
    )

    executor = ThreadPoolExecutor(max_workers=max_query_workers)
    logger.debug("Starting %r with %s workers...", executor,
                 executor._max_workers)
    metrics.gauge("executor.workers", executor._max_workers)

    with closing(consumer), executor, closing(producer):
        batching_consumer = StreamProcessor(
            consumer,
            (Topic(topic) if topic is not None else Topic(
                loader.get_default_topic_spec().topic_name)),
            BatchProcessingStrategyFactory(
                SubscriptionWorker(
                    dataset,
                    executor,
                    {
                        index: SubscriptionScheduler(
                            RedisSubscriptionDataStore(redis_client, dataset,
                                                       PartitionId(index)),
                            PartitionId(index),
                            cache_ttl=timedelta(seconds=schedule_ttl),
                            metrics=metrics,
                        )
                        for index in
                        range(partitions if partitions is not None else loader.
                              get_default_topic_spec().partitions_number)
                    },
                    producer,
                    Topic(result_topic),
                    metrics,
                ),
                max_batch_size,
                max_batch_time_ms,
                metrics,
            ),
            metrics=metrics,
        )

        def handler(signum, frame) -> None:
            batching_consumer.signal_shutdown()

        signal.signal(signal.SIGINT, handler)
        signal.signal(signal.SIGTERM, handler)

        batching_consumer.run()
Exemplo n.º 8
0
def subscriptions(
    *,
    dataset_name: str,
    topic: Optional[str],
    partitions: Optional[int],
    commit_log_topic: Optional[str],
    commit_log_groups: Sequence[str],
    consumer_group: str,
    auto_offset_reset: str,
    bootstrap_servers: Sequence[str],
    max_batch_size: int,
    max_batch_time_ms: int,
    schedule_ttl: int,
    result_topic: Optional[str],
    log_level: Optional[str],
) -> None:
    """Evaluates subscribed queries for a dataset."""

    assert result_topic is not None

    setup_logging(log_level)
    setup_sentry()

    dataset = get_dataset(dataset_name)

    if not bootstrap_servers:
        bootstrap_servers = settings.DEFAULT_DATASET_BROKERS.get(
            dataset_name, settings.DEFAULT_BROKERS
        )

    loader = enforce_table_writer(dataset).get_stream_loader()

    consumer = TickConsumer(
        SynchronizedConsumer(
            KafkaConsumer(
                build_kafka_consumer_configuration(
                    bootstrap_servers,
                    consumer_group,
                    auto_offset_reset=auto_offset_reset,
                ),
                PassthroughCodec(),
            ),
            KafkaConsumer(
                build_kafka_consumer_configuration(
                    bootstrap_servers,
                    f"subscriptions-commit-log-{uuid.uuid1().hex}",
                    auto_offset_reset="earliest",
                ),
                CommitCodec(),
            ),
            (
                Topic(commit_log_topic)
                if commit_log_topic is not None
                else Topic(loader.get_commit_log_topic_spec().topic_name)
            ),
            set(commit_log_groups),
        )
    )

    producer = KafkaProducer(
        {
            "bootstrap.servers": ",".join(bootstrap_servers),
            "partitioner": "consistent",
            "message.max.bytes": 50000000,  # 50MB, default is 1MB
        },
        SubscriptionResultCodec(),
    )

    with closing(consumer), closing(producer):
        batching_consumer = BatchingConsumer(
            consumer,
            (
                Topic(topic)
                if topic is not None
                else Topic(loader.get_default_topic_spec().topic_name)
            ),
            SubscriptionWorker(
                SubscriptionExecutor(
                    dataset,
                    ThreadPoolExecutor(
                        max_workers=settings.SUBSCRIPTIONS_MAX_CONCURRENT_QUERIES
                    ),
                ),
                {
                    index: SubscriptionScheduler(
                        RedisSubscriptionDataStore(
                            redis_client, dataset, PartitionId(index)
                        ),
                        PartitionId(index),
                        cache_ttl=timedelta(seconds=schedule_ttl),
                    )
                    for index in range(
                        partitions
                        if partitions is not None
                        else loader.get_default_topic_spec().partitions_number
                    )
                },
                producer,
                Topic(result_topic),
            ),
            max_batch_size,
            max_batch_time_ms,
            create_metrics(
                "snuba.subscriptions",
                tags={"group": consumer_group, "dataset": dataset_name},
            ),
        )

        def handler(signum, frame) -> None:
            batching_consumer.signal_shutdown()

        signal.signal(signal.SIGINT, handler)
        signal.signal(signal.SIGTERM, handler)

        batching_consumer.run()
Exemplo n.º 9
0
def test_synchronized_consumer_pause_resume() -> None:
    topic = Topic("topic")
    commit_log_topic = Topic("commit-log")

    broker: DummyBroker[int] = DummyBroker()
    broker.create_topic(topic, partitions=1)
    consumer: Consumer[int] = DummyConsumer(broker, "consumer")
    producer: Producer[int] = DummyProducer(broker)
    messages = [producer.produce(topic, i).result(1.0) for i in range(2)]

    commit_log_broker: DummyBroker[Commit] = DummyBroker()
    commit_log_broker.create_topic(commit_log_topic, partitions=1)
    commit_log_consumer: Consumer[Commit] = DummyConsumer(
        commit_log_broker, "commit-log-consumer")
    commit_log_producer: Producer[Commit] = DummyProducer(commit_log_broker)

    synchronized_consumer: Consumer[int] = SynchronizedConsumer(
        consumer,
        commit_log_consumer,
        commit_log_topic=commit_log_topic,
        commit_log_groups={"leader"},
    )

    with closing(synchronized_consumer):
        synchronized_consumer.subscribe([topic])

        # TODO: This test is not ideal -- there are no guarantees that the
        # commit log worker has subscribed and started polling yet.
        with assert_changes(synchronized_consumer.paused, [],
                            [Partition(topic, 0)]), assert_changes(
                                consumer.paused, [], [Partition(topic, 0)]):
            synchronized_consumer.pause([Partition(topic, 0)])

        # Advancing the commit log offset should not cause the consumer to
        # resume, since it has been explicitly paused.
        wait_for_consumer(
            commit_log_consumer,
            commit_log_producer.produce(
                commit_log_topic,
                Commit("leader", Partition(topic, 0),
                       messages[0].get_next_offset()),
            ).result(),
        )

        with assert_does_not_change(consumer.paused, [Partition(topic, 0)]):
            assert synchronized_consumer.poll(0) is None

        # Resuming the partition does not immediately cause the partition to
        # resume, but it should look as if it is resumed to the caller.
        with assert_changes(synchronized_consumer.paused,
                            [Partition(topic, 0)], []), assert_does_not_change(
                                consumer.paused, [Partition(topic, 0)]):
            synchronized_consumer.resume([Partition(topic, 0)])

        # The partition should be resumed on the next poll call, however.
        with assert_changes(consumer.paused, [Partition(topic, 0)], []):
            assert synchronized_consumer.poll(0) == messages[0]

        # Pausing due to hitting the offset fence should not appear as a paused
        # partition to the caller.
        with assert_does_not_change(synchronized_consumer.paused,
                                    []), assert_changes(
                                        consumer.paused, [],
                                        [Partition(topic, 0)]):
            assert synchronized_consumer.poll(0) is None

        # Other pause and resume actions should not cause the inner consumer to
        # change its state while up against the fence.
        with assert_changes(synchronized_consumer.paused, [],
                            [Partition(topic, 0)]), assert_does_not_change(
                                consumer.paused, [Partition(topic, 0)]):
            synchronized_consumer.pause([Partition(topic, 0)])

        with assert_changes(synchronized_consumer.paused,
                            [Partition(topic, 0)], []), assert_does_not_change(
                                consumer.paused, [Partition(topic, 0)]):
            synchronized_consumer.resume([Partition(topic, 0)])