Пример #1
0
def test_table_name_filter() -> None:
    table_name = "table_name"
    message_filter = CdcTableNameMessageFilter(table_name)

    # Messages that math the table should not be dropped.
    assert not message_filter.should_drop(
        Message(
            Partition(Topic("topic"), 0),
            0,
            KafkaPayload(None, b"", [("table", table_name.encode("utf8"))]),
            datetime.now(),
        ))

    # Messages without a table should be dropped.
    assert message_filter.should_drop(
        Message(
            Partition(Topic("topic"), 0),
            0,
            KafkaPayload(None, b"", []),
            datetime.now(),
        ))

    # Messages from a different table should be dropped.
    assert message_filter.should_drop(
        Message(
            Partition(Topic("topic"), 0),
            0,
            KafkaPayload(None, b"", [("table", b"other_table")]),
            datetime.now(),
        ))
Пример #2
0
 def _wrap(self, msg: str) -> Message[KafkaPayload]:
     return Message(
         Partition(Topic("replacements"), 0),
         0,
         KafkaPayload(None, json.dumps(msg).encode("utf-8"), []),
         datetime.now(),
     )
Пример #3
0
    def test_skip_too_old(self):
        test_worker = ConsumerWorker(
            self.dataset.get_writable_storage(),
            producer=FakeConfluentKafkaProducer(),
            replacements_topic=Topic(
                enforce_table_writer(self.dataset).get_stream_loader().
                get_replacement_topic_spec().topic_name),
            metrics=self.metrics,
        )

        event = self.event
        old_timestamp = datetime.utcnow() - timedelta(days=300)
        old_timestamp_str = old_timestamp.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
        event["datetime"] = old_timestamp_str
        event["data"]["datetime"] = old_timestamp_str
        event["data"]["received"] = int(
            calendar.timegm(old_timestamp.timetuple()))

        message: Message[KafkaPayload] = Message(
            Partition(Topic("events"), 1),
            42,
            KafkaPayload(None,
                         json.dumps((2, "insert", event)).encode("utf-8"), []),
            datetime.now(),
        )

        assert test_worker.process_message(message) is None
Пример #4
0
    def test_offsets(self):
        event = self.event

        message: Message[KafkaPayload] = Message(
            Partition(Topic("events"), 456),
            123,
            KafkaPayload(None,
                         json.dumps((2, "insert", event)).encode("utf-8"),
                         []),  # event doesn't really matter
            datetime.now(),
        )

        test_worker = ConsumerWorker(
            self.dataset.get_writable_storage(),
            producer=FakeConfluentKafkaProducer(),
            replacements_topic=Topic(
                enforce_table_writer(self.dataset).get_stream_loader().
                get_replacement_topic_spec().topic_name),
            metrics=self.metrics,
        )
        batch = [test_worker.process_message(message)]
        test_worker.flush_batch(batch)

        clickhouse = (get_storage(
            StorageKey.EVENTS).get_cluster().get_query_connection(
                ClickhouseClientSettings.QUERY))

        assert clickhouse.execute(
            "SELECT project_id, event_id, offset, partition FROM %s" %
            self.table) == [(self.event["project_id"], self.event["event_id"],
                             123, 456)]
Пример #5
0
    def eventstream(*, dataset: Dataset):
        record = json.loads(http_request.data)

        version = record[0]
        if version != 2:
            raise RuntimeError("Unsupported protocol version: %s" % record)

        message: Message[KafkaPayload] = Message(
            Partition(Topic("topic"), 0),
            0,
            KafkaPayload(None, http_request.data, []),
            datetime.now(),
        )

        type_ = record[1]

        storage = dataset.get_writable_storage()
        assert storage is not None

        if type_ == "insert":
            from snuba.consumer import ConsumerWorker

            worker = ConsumerWorker(storage, metrics=metrics)
        else:
            from snuba.replacer import ReplacerWorker

            worker = ReplacerWorker(storage, metrics=metrics)

        processed = worker.process_message(message)
        if processed is not None:
            batch = [processed]
            worker.flush_batch(batch)

        return ("ok", 200, {"Content-Type": "text/plain"})
Пример #6
0
    def test_flattened_tags(self):
        self.event["project_id"] = self.project_id
        self.event["group_id"] = 1
        # | and = are intentional to test the escaping logic when computing the
        # flattened_tags on tag deletions
        self.event["data"]["tags"] = []
        self.event["data"]["tags"].append(["browser|name", "foo=1"])
        self.event["data"]["tags"].append(["browser|to_delete", "foo=2"])
        self.event["data"]["tags"].append(["notbrowser", "foo\\3"])
        self.event["data"]["tags"].append(["notbrowser2", "foo4"])
        self.write_events([self.event])

        project_id = self.project_id

        def _fetch_flattened_tags():
            return json.loads(
                self.app.post(
                    "/query",
                    data=json.dumps({
                        "project": [project_id],
                        "selected_columns": [
                            "_tags_flattened",
                            "tags.key",
                            "tags.value",
                        ],
                    }),
                ).data)["data"]

        timestamp = datetime.now(tz=pytz.utc)

        message: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            42,
            KafkaPayload(
                None,
                json.dumps((
                    2,
                    "end_delete_tag",
                    {
                        "project_id": project_id,
                        "tag": "browser|to_delete",
                        "datetime":
                        timestamp.strftime(PAYLOAD_DATETIME_FORMAT),
                    },
                )).encode("utf-8"),
                [],
            ),
            datetime.now(),
        )

        processed = self.replacer.process_message(message)
        self.replacer.flush_batch([processed])

        assert _fetch_flattened_tags() == [{
            "tags.key": ["browser|name", "notbrowser", "notbrowser2"],
            "tags.value": ["foo=1", "foo\\3", "foo4"],
            "_tags_flattened":
            "|browser\\|name=foo\\=1||notbrowser=foo\\\\3||notbrowser2=foo4|",
        }]
Пример #7
0
 def __make_msg(self, partition: int, offset: int, payload: str,
                headers: Headers) -> Message[KafkaPayload]:
     return Message(
         partition=Partition(Topic("topic"), partition),
         offset=offset,
         payload=KafkaPayload(b"key", payload.encode(), headers),
         timestamp=datetime(2019, 6, 19, 6, 46, 28),
     )
Пример #8
0
    def test_delete_tag_promoted_insert(self):
        self.event["project_id"] = self.project_id
        self.event["group_id"] = 1
        self.event["data"]["tags"].append(["browser.name", "foo"])
        self.event["data"]["tags"].append(["notbrowser", "foo"])
        self.write_unprocessed_events([self.event])

        project_id = self.project_id

        def _issue_count(total=False):
            return json.loads(
                self.app.post(
                    "/query",
                    data=json.dumps(
                        {
                            "project": [project_id],
                            "aggregations": [["count()", "", "count"]],
                            "conditions": [["tags[browser.name]", "=", "foo"]]
                            if not total
                            else [],
                            "groupby": ["group_id"],
                        }
                    ),
                ).data
            )["data"]

        assert _issue_count() == [{"count": 1, "group_id": 1}]
        assert _issue_count(total=True) == [{"count": 1, "group_id": 1}]

        timestamp = datetime.now(tz=pytz.utc)

        message: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            42,
            KafkaPayload(
                None,
                json.dumps(
                    (
                        2,
                        "end_delete_tag",
                        {
                            "project_id": project_id,
                            "tag": "browser.name",
                            "datetime": timestamp.strftime(PAYLOAD_DATETIME_FORMAT),
                        },
                    )
                ).encode("utf-8"),
                [],
            ),
            datetime.now(),
        )

        processed = self.replacer.process_message(message)
        self.replacer.flush_batch([processed])

        assert _issue_count() == []
        assert _issue_count(total=True) == [{"count": 1, "group_id": 1}]
Пример #9
0
def test_multistorage_strategy() -> None:
    from snuba.datasets.storages import groupassignees, groupedmessages

    from tests.datasets.cdc.test_groupassignee import TestGroupassignee
    from tests.datasets.cdc.test_groupedmessage import TestGroupedMessage

    commit = Mock()

    storages = [groupassignees.storage, groupedmessages.storage]

    strategy = MultistorageConsumerProcessingStrategyFactory(
        storages,
        10,
        10,
        1,
        int(32 * 1e6),
        int(64 * 1e6),
        TestingMetricsBackend(),
    ).create(commit)

    payloads = [
        KafkaPayload(None, b"{}", [("table", b"ignored")]),
        KafkaPayload(
            None,
            json.dumps(TestGroupassignee.INSERT_MSG).encode("utf8"),
            [("table",
              groupassignees.storage.get_postgres_table().encode("utf8"))],
        ),
        KafkaPayload(
            None,
            json.dumps(TestGroupedMessage.INSERT_MSG).encode("utf8"),
            [("table",
              groupedmessages.storage.get_postgres_table().encode("utf8"))],
        ),
    ]

    messages = [
        Message(Partition(Topic("topic"), 0), offset, payload, datetime.now(),
                offset + 1) for offset, payload in enumerate(payloads)
    ]

    with assert_changes(lambda: get_row_count(groupassignees.storage), 0,
                        1), assert_changes(
                            lambda: get_row_count(groupedmessages.storage), 0,
                            1):

        for message in messages:
            strategy.submit(message)

        with assert_changes(lambda: commit.call_args_list, [],
                            [call({Partition(Topic("topic"), 0): 3})]):
            strategy.close()
            strategy.join()
Пример #10
0
def get_messages(events_file) -> Sequence[Message[KafkaPayload]]:
    "Create a fake Kafka message for each JSON event in the file."
    messages: MutableSequence[Message[KafkaPayload]] = []
    raw_events = open(events_file).readlines()
    for raw_event in raw_events:
        messages.append(
            Message(
                Partition(Topic("events"), 1),
                0,
                KafkaPayload(None, raw_event.encode("utf-8"), []),
                datetime.now(),
            ), )
    return messages
Пример #11
0
    def eventstream(*, dataset: Dataset) -> RespTuple:
        record = json.loads(http_request.data)

        version = record[0]
        if version != 2:
            raise RuntimeError("Unsupported protocol version: %s" % record)

        message: Message[KafkaPayload] = Message(
            Partition(Topic("topic"), 0),
            0,
            KafkaPayload(None, http_request.data, []),
            datetime.now(),
        )

        type_ = record[1]

        storage = dataset.get_default_entity().get_writable_storage()
        assert storage is not None

        if type_ == "insert":
            from snuba.consumers.consumer import StreamingConsumerStrategyFactory

            table_writer = storage.get_table_writer()
            stream_loader = table_writer.get_stream_loader()
            strategy = StreamingConsumerStrategyFactory(
                stream_loader.get_pre_filter(),
                stream_loader.get_processor(),
                table_writer.get_batch_writer(metrics),
                metrics,
                max_batch_size=1,
                max_batch_time=1.0,
                processes=None,
                input_block_size=None,
                output_block_size=None,
            ).create(lambda offsets: None)
            strategy.submit(message)
            strategy.close()
            strategy.join()
        else:
            from snuba.replacer import ReplacerWorker

            worker = ReplacerWorker(storage, metrics=metrics)
            processed = worker.process_message(message)
            if processed is not None:
                batch = [processed]
                worker.flush_batch(batch)

        return ("ok", 200, {"Content-Type": "text/plain"})
Пример #12
0
    def test_unmerge_insert(self):
        self.event["project_id"] = self.project_id
        self.event["group_id"] = 1
        self.event["primary_hash"] = "a" * 32
        self.write_events([self.event])

        assert self._issue_count(self.project_id) == [{
            "count": 1,
            "group_id": 1
        }]

        timestamp = datetime.now(tz=pytz.utc)

        project_id = self.project_id

        message: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            42,
            KafkaPayload(
                None,
                json.dumps((
                    2,
                    "end_unmerge",
                    {
                        "project_id": project_id,
                        "previous_group_id": 1,
                        "new_group_id": 2,
                        "hashes": ["a" * 32],
                        "datetime":
                        timestamp.strftime(PAYLOAD_DATETIME_FORMAT),
                    },
                )).encode("utf-8"),
                [],
            ),
            datetime.now(),
        )

        processed = self.replacer.process_message(message)
        self.replacer.flush_batch([processed])

        assert self._issue_count(self.project_id) == [{
            "count": 1,
            "group_id": 2
        }]
Пример #13
0
    def submit(
        self,
        message: Message[
            Sequence[
                Tuple[StorageKey, Union[None, JSONRowInsertBatch, ReplacementBatch]]
            ]
        ],
    ) -> None:
        assert not self.__closed

        for storage_key, payload in message.payload:
            self.__steps[storage_key].submit(
                Message(
                    message.partition,
                    message.offset,
                    payload,
                    message.timestamp,
                    message.next_offset,
                )
            )
Пример #14
0
    def test_delete_groups_insert(self):
        self.event["project_id"] = self.project_id
        self.event["group_id"] = 1
        write_unprocessed_events(self.storage, [self.event])

        assert self._issue_count(self.project_id) == [{
            "count": 1,
            "group_id": 1
        }]

        timestamp = datetime.now(tz=pytz.utc)

        project_id = self.project_id

        message: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            42,
            KafkaPayload(
                None,
                json.dumps((
                    2,
                    "end_delete_groups",
                    {
                        "project_id": project_id,
                        "group_ids": [1],
                        "datetime":
                        timestamp.strftime(PAYLOAD_DATETIME_FORMAT),
                    },
                )).encode("utf-8"),
                [],
            ),
            datetime.now(),
        )

        processed = self.replacer.process_message(message)
        self.replacer.flush_batch([processed])

        assert self._issue_count(self.project_id) == []
Пример #15
0
    def poll(self, timeout: Optional[float] = None) -> Optional[Message[Tick]]:
        message = self.__consumer.poll(timeout)
        if message is None:
            return None

        previous_message = self.__previous_messages.get(message.partition)

        result: Optional[Message[Tick]]
        if previous_message is not None:
            try:
                time_interval = Interval(previous_message.timestamp,
                                         message.timestamp)
            except InvalidRangeError:
                logger.warning(
                    "Could not construct valid time interval between %r and %r!",
                    previous_message,
                    message,
                    exc_info=True,
                )
                return None
            else:
                result = Message(
                    message.partition,
                    previous_message.offset,
                    Tick(
                        Interval(previous_message.offset, message.offset),
                        time_interval,
                    ).time_shift(self.__time_shift),
                    message.timestamp,
                )
        else:
            result = None

        self.__previous_messages[message.partition] = MessageDetails(
            message.offset, message.timestamp)

        return result
Пример #16
0
def test_subscription_worker(broker: Broker[SubscriptionTaskResult], ) -> None:
    result_topic = Topic("subscription-results")

    broker.create_topic(result_topic, partitions=1)

    frequency = timedelta(minutes=1)
    evaluations = 3

    subscription = Subscription(
        SubscriptionIdentifier(PartitionId(0), uuid1()),
        SubscriptionData(
            project_id=1,
            conditions=[],
            aggregations=[["count()", "", "count"]],
            time_window=timedelta(minutes=60),
            resolution=frequency,
        ),
    )

    store = DummySubscriptionDataStore()
    store.create(subscription.identifier.uuid, subscription.data)

    metrics = DummyMetricsBackend(strict=True)

    dataset = get_dataset("events")
    worker = SubscriptionWorker(
        dataset,
        ThreadPoolExecutor(),
        {
            0: SubscriptionScheduler(store, PartitionId(0), timedelta(),
                                     metrics)
        },
        broker.get_producer(),
        result_topic,
        metrics,
    )

    now = datetime(2000, 1, 1)

    tick = Tick(
        offsets=Interval(0, 1),
        timestamps=Interval(now - (frequency * evaluations), now),
    )

    result_futures = worker.process_message(
        Message(Partition(Topic("events"), 0), 0, tick, now))

    assert result_futures is not None and len(result_futures) == evaluations

    # Publish the results.
    worker.flush_batch([result_futures])

    # Check to make sure the results were published.
    # NOTE: This does not cover the ``SubscriptionTaskResultCodec``!
    consumer = broker.get_consumer("group")
    consumer.subscribe([result_topic])

    for i in range(evaluations):
        timestamp = now - frequency * (evaluations - i)

        message = consumer.poll()
        assert message is not None
        assert message.partition.topic == result_topic

        task, future = result_futures[i]
        future_result = request, result = future.result()
        assert message.payload.task.timestamp == timestamp
        assert message.payload == SubscriptionTaskResult(task, future_result)

        # NOTE: The time series extension is folded back into the request
        # body, ideally this would reference the timeseries options in
        # isolation.
        assert (request.body.items() > {
            "from_date":
            (timestamp - subscription.data.time_window).isoformat(),
            "to_date":
            timestamp.isoformat(),
        }.items())

        assert result == {
            "meta": [{
                "name": "count",
                "type": "UInt64"
            }],
            "data": [{
                "count": 0
            }],
        }
Пример #17
0
def test_streaming_consumer_strategy() -> None:
    messages = (Message(
        Partition(Topic("events"), 0),
        i,
        KafkaPayload(None, b"{}", None),
        datetime.now(),
    ) for i in itertools.count())

    replacements_producer = FakeConfluentKafkaProducer()

    processor = Mock()
    processor.process_message.side_effect = [
        None,
        InsertBatch([{}]),
        ReplacementBatch("key", [{}]),
    ]

    writer = Mock()

    metrics = TestingMetricsBackend()

    factory = StreamingConsumerStrategyFactory(
        None,
        processor,
        writer,
        metrics,
        max_batch_size=10,
        max_batch_time=60,
        processes=None,
        input_block_size=None,
        output_block_size=None,
        replacements_producer=replacements_producer,
        replacements_topic=Topic("replacements"),
    )

    commit_function = Mock()
    strategy = factory.create(commit_function)

    for i in range(3):
        strategy.poll()
        strategy.submit(next(messages))

    assert metrics.calls == []

    processor.process_message.side_effect = [{}]

    with pytest.raises(TypeError):
        strategy.poll()
        strategy.submit(next(messages))

    def get_number_of_insertion_metrics() -> int:
        count = 0
        for call in metrics.calls:
            if isinstance(call,
                          Timing) and call.name == "insertions.latency_ms":
                count += 1
        return count

    expected_write_count = 1

    with assert_changes(get_number_of_insertion_metrics, 0,
                        expected_write_count), assert_changes(
                            lambda: writer.write.call_count, 0,
                            expected_write_count), assert_changes(
                                lambda: len(replacements_producer.messages), 0,
                                1):
        strategy.close()
        strategy.join()
Пример #18
0
    def test_reprocessing_flow_insert(self) -> None:
        # We have a group that contains two events, 1 and 2.
        self.event["project_id"] = self.project_id
        self.event["group_id"] = 1
        self.event["event_id"] = event_id = "00e24a150d7f4ee4b142b61b4d893b6d"
        write_unprocessed_events(self.storage, [self.event])
        self.event["event_id"] = event_id2 = "00e24a150d7f4ee4b142b61b4d893b6e"
        write_unprocessed_events(self.storage, [self.event])

        assert self._issue_count(self.project_id) == [{
            "count": 2,
            "group_id": 1
        }]

        project_id = self.project_id

        message: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            42,
            KafkaPayload(
                None,
                json.dumps((
                    2,
                    "tombstone_events",
                    {
                        "project_id": project_id,
                        "event_ids": [event_id]
                    },
                )).encode("utf-8"),
                [],
            ),
            datetime.now(),
        )

        # The user chooses to reprocess a subset of the group and throw away
        # the other events. Event 1 gets manually tombstoned by Sentry while
        # Event 2 prevails.
        processed = self.replacer.process_message(message)
        self.replacer.flush_batch([processed])

        # At this point the count doesn't make any sense but we don't care.
        assert self._issue_count(self.project_id) == [{
            "count": 2,
            "group_id": 1
        }]

        # The reprocessed event is inserted with a guaranteed-new group ID but
        # the *same* event ID (this is why we need to skip tombstoning this
        # event ID)
        self.event["group_id"] = 2
        write_unprocessed_events(self.storage, [self.event])

        message: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            42,
            KafkaPayload(
                None,
                json.dumps((
                    2,
                    "exclude_groups",
                    {
                        "project_id": project_id,
                        "group_ids": [1]
                    },
                )).encode("utf-8"),
                [],
            ),
            datetime.now(),
        )

        # Group 1 is excluded from queries. At this point we have almost a
        # regular group deletion, except only a subset of events have been
        # tombstoned (the ones that will *not* be reprocessed).
        processed = self.replacer.process_message(message)
        self.replacer.flush_batch([processed])

        # Group 2 should contain the one event that the user chose to
        # reprocess, and Group 1 should be gone. (Note: In the product Group 2
        # looks identical to Group 1, including short ID).
        assert self._issue_count(self.project_id) == [{
            "count": 1,
            "group_id": 2
        }]
        assert self._get_group_id(project_id, event_id2) == 2
        assert not self._get_group_id(project_id, event_id)
Пример #19
0
def test_tick_consumer(clock: Clock, broker: Broker[int],
                       time_shift: Optional[timedelta]) -> None:
    epoch = datetime.fromtimestamp(clock.time())

    topic = Topic("messages")

    broker.create_topic(topic, partitions=2)

    producer = broker.get_producer()
    for partition, payloads in enumerate([[0, 1, 2], [0]]):
        for payload in payloads:
            producer.produce(Partition(topic, partition), payload).result()

    inner_consumer = broker.get_consumer("group")

    consumer = TickConsumer(inner_consumer, time_shift=time_shift)

    if time_shift is None:
        time_shift = timedelta()

    def assignment_callback(offsets: Mapping[Partition, int]) -> None:
        assignment_callback.called = True

        assert consumer.tell() == {
            Partition(topic, 0): 0,
            Partition(topic, 1): 0,
        }

        assert inner_consumer.tell() == {
            Partition(topic, 0): 0,
            Partition(topic, 1): 0,
        }

    assignment_callback.called = False

    consumer.subscribe([topic], on_assign=assignment_callback)

    with assert_changes(lambda: assignment_callback.called, False, True):
        # consume 0, 0
        assert consumer.poll() is None

    assert consumer.tell() == {
        Partition(topic, 0): 0,
        Partition(topic, 1): 0,
    }

    assert inner_consumer.tell() == {
        Partition(topic, 0): 1,
        Partition(topic, 1): 0,
    }

    # consume 0, 1
    assert consumer.poll() == Message(
        Partition(topic, 0),
        0,
        Tick(offsets=Interval(0, 1),
             timestamps=Interval(epoch, epoch)).time_shift(time_shift),
        epoch,
    )

    assert consumer.tell() == {
        Partition(topic, 0): 1,
        Partition(topic, 1): 0,
    }

    assert inner_consumer.tell() == {
        Partition(topic, 0): 2,
        Partition(topic, 1): 0,
    }

    # consume 0, 2
    assert consumer.poll() == Message(
        Partition(topic, 0),
        1,
        Tick(offsets=Interval(1, 2),
             timestamps=Interval(epoch, epoch)).time_shift(time_shift),
        epoch,
    )

    assert consumer.tell() == {
        Partition(topic, 0): 2,
        Partition(topic, 1): 0,
    }

    assert inner_consumer.tell() == {
        Partition(topic, 0): 3,
        Partition(topic, 1): 0,
    }

    # consume 1, 0
    assert consumer.poll() is None

    assert consumer.tell() == {
        Partition(topic, 0): 2,
        Partition(topic, 1): 0,
    }

    assert inner_consumer.tell() == {
        Partition(topic, 0): 3,
        Partition(topic, 1): 1,
    }

    # consume no message
    assert consumer.poll() is None

    assert consumer.tell() == {
        Partition(topic, 0): 2,
        Partition(topic, 1): 0,
    }

    assert inner_consumer.tell() == {
        Partition(topic, 0): 3,
        Partition(topic, 1): 1,
    }

    consumer.seek({Partition(topic, 0): 1})

    assert consumer.tell() == {
        Partition(topic, 0): 1,
        Partition(topic, 1): 0,
    }

    assert inner_consumer.tell() == {
        Partition(topic, 0): 1,
        Partition(topic, 1): 1,
    }

    # consume 0, 1
    assert consumer.poll() is None

    assert consumer.tell() == {
        Partition(topic, 0): 1,
        Partition(topic, 1): 0,
    }

    assert inner_consumer.tell() == {
        Partition(topic, 0): 2,
        Partition(topic, 1): 1,
    }

    # consume 0, 2
    assert consumer.poll() == Message(
        Partition(topic, 0),
        1,
        Tick(offsets=Interval(1, 2),
             timestamps=Interval(epoch, epoch)).time_shift(time_shift),
        epoch,
    )

    assert consumer.tell() == {
        Partition(topic, 0): 2,
        Partition(topic, 1): 0,
    }

    assert inner_consumer.tell() == {
        Partition(topic, 0): 3,
        Partition(topic, 1): 1,
    }

    with pytest.raises(ConsumerError):
        consumer.seek({Partition(topic, -1): 0})
Пример #20
0
def test_tick_consumer_non_monotonic(clock: Clock,
                                     broker: Broker[int]) -> None:
    epoch = datetime.fromtimestamp(clock.time())

    topic = Topic("messages")
    partition = Partition(topic, 0)

    broker.create_topic(topic, partitions=1)

    producer = broker.get_producer()

    inner_consumer = broker.get_consumer("group")

    consumer = TickConsumer(inner_consumer)

    def assignment_callback(offsets: Mapping[Partition, int]) -> None:
        assignment_callback.called = True
        assert inner_consumer.tell() == {partition: 0}
        assert consumer.tell() == {partition: 0}

    assignment_callback.called = False

    consumer.subscribe([topic], on_assign=assignment_callback)

    producer.produce(partition, 0)

    clock.sleep(1)

    producer.produce(partition, 1)

    with assert_changes(lambda: assignment_callback.called, False, True):
        assert consumer.poll() is None

    assert inner_consumer.tell() == {partition: 1}
    assert consumer.tell() == {partition: 0}

    with assert_changes(inner_consumer.tell, {partition: 1},
                        {partition: 2}), assert_changes(
                            consumer.tell, {partition: 0}, {partition: 1}):
        assert consumer.poll() == Message(
            partition,
            0,
            Tick(
                offsets=Interval(0, 1),
                timestamps=Interval(epoch, epoch + timedelta(seconds=1)),
            ),
            epoch + timedelta(seconds=1),
        )

    clock.sleep(-1)

    producer.produce(partition, 2)

    with assert_changes(inner_consumer.tell, {partition: 2},
                        {partition: 3}), assert_does_not_change(
                            consumer.tell, {partition: 1}):
        assert consumer.poll() is None

    clock.sleep(2)

    producer.produce(partition, 3)

    with assert_changes(inner_consumer.tell, {partition: 3},
                        {partition: 4}), assert_changes(
                            consumer.tell, {partition: 1}, {partition: 3}):
        assert consumer.poll() == Message(
            partition,
            1,
            Tick(
                offsets=Interval(1, 3),
                timestamps=Interval(epoch + timedelta(seconds=1),
                                    epoch + timedelta(seconds=2)),
            ),
            epoch + timedelta(seconds=2),
        )