Exemplo n.º 1
0
def test_table_name_filter() -> None:
    table_name = "table_name"
    message_filter = CdcTableNameMessageFilter(table_name)

    # Messages that math the table should not be dropped.
    assert not message_filter.should_drop(
        Message(
            Partition(Topic("topic"), 0),
            0,
            KafkaPayload(None, b"", [("table", table_name.encode("utf8"))]),
            datetime.now(),
        ))

    # Messages without a table should be dropped.
    assert message_filter.should_drop(
        Message(
            Partition(Topic("topic"), 0),
            0,
            KafkaPayload(None, b"", []),
            datetime.now(),
        ))

    # Messages from a different table should be dropped.
    assert message_filter.should_drop(
        Message(
            Partition(Topic("topic"), 0),
            0,
            KafkaPayload(None, b"", [("table", b"other_table")]),
            datetime.now(),
        ))
Exemplo n.º 2
0
    def submit(
        self,
        message: Message[Sequence[Tuple[StorageKey,
                                        Union[None, BytesInsertBatch,
                                              ReplacementBatch]]]],
    ) -> None:
        assert not self.__closed

        for storage_key, payload in message.payload:
            writer_message = Message(
                message.partition,
                message.offset,
                payload,
                message.timestamp,
            )
            self.__steps[storage_key].submit(writer_message)

            # we collect the messages in self.__messages in the off chance
            # that we get an error submitting a batch and need to forward
            # these message to the dead letter topic. The payload doesn't
            # have storage information so we need to keep the storage_key
            other_message = Message(
                message.partition,
                message.offset,
                (storage_key, payload),
                message.timestamp,
            )

            self.__messages[storage_key].append(other_message)
Exemplo n.º 3
0
    def test_multiple_partitions(self) -> None:
        """
        Different partitions should have independent offset checks.
        """
        set_config("skip_seen_offsets", True)
        self.event["project_id"] = self.project_id
        self.event["group_id"] = 1
        self.event["primary_hash"] = "a" * 32
        write_unprocessed_events(self.storage, [self.event])

        payload = KafkaPayload(
            None,
            json.dumps((
                2,
                ReplacementType.END_UNMERGE,
                {
                    "project_id":
                    self.project_id,
                    "previous_group_id":
                    1,
                    "new_group_id":
                    2,
                    "hashes": ["a" * 32],
                    "datetime":
                    datetime.utcnow().strftime(PAYLOAD_DATETIME_FORMAT),
                },
            )).encode("utf-8"),
            [],
        )
        offset = 42
        timestamp = datetime.now()

        partition_one: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            offset,
            payload,
            timestamp,
        )
        partition_two: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 2),
            offset,
            payload,
            timestamp,
        )

        processed = self.replacer.process_message(partition_one)
        self.replacer.flush_batch([processed])
        # different partition should be unaffected even if it's the same offset
        assert self.replacer.process_message(partition_two) is not None
Exemplo n.º 4
0
    def join(self, timeout: Optional[float] = None) -> None:
        start = time.time()

        while self.__queue:
            remaining = timeout - (time.time() -
                                   start) if timeout is not None else None

            if remaining is not None and remaining <= 0:
                logger.warning(
                    f"Timed out with {len(self.__queue)} futures in queue")
                break

            message, result_future = self.__queue.popleft()

            subscription_task_result = SubscriptionTaskResult(
                result_future.task, result_future.future.result(remaining))

            self.__next_step.submit(
                Message(
                    message.partition,
                    message.offset,
                    subscription_task_result,
                    message.timestamp,
                ))

        remaining = timeout - (time.time() -
                               start) if timeout is not None else None
        self.__executor.shutdown()

        self.__next_step.close()
        self.__next_step.join(remaining)
Exemplo n.º 5
0
    def test_delete_tag_promoted_insert(self) -> None:
        self.event["project_id"] = self.project_id
        self.event["group_id"] = 1
        self.event["data"]["tags"].append(["browser.name", "foo"])
        self.event["data"]["tags"].append(["notbrowser", "foo"])
        write_unprocessed_events(self.storage, [self.event])

        project_id = self.project_id

        def _issue_count(total: bool = False) -> Sequence[Mapping[str, Any]]:
            clickhouse = self.storage.get_cluster().get_query_connection(
                ClickhouseClientSettings.QUERY)

            total_cond = (
                "AND has(_tags_hash_map, cityHash64('browser.name=foo'))"
                if not total else "")

            data = clickhouse.execute(f"""
                SELECT group_id, count()
                FROM errors_local
                FINAL
                WHERE deleted = 0
                AND project_id = {project_id}
                {total_cond}
                GROUP BY group_id
                """).results

            return [{"group_id": row[0], "count": row[1]} for row in data]

        assert _issue_count() == [{"count": 1, "group_id": 1}]
        assert _issue_count(total=True) == [{"count": 1, "group_id": 1}]

        timestamp = datetime.now(tz=pytz.utc)

        message: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            42,
            KafkaPayload(
                None,
                json.dumps((
                    2,
                    ReplacementType.END_DELETE_TAG,
                    {
                        "project_id": project_id,
                        "tag": "browser.name",
                        "datetime":
                        timestamp.strftime(PAYLOAD_DATETIME_FORMAT),
                    },
                )).encode("utf-8"),
                [],
            ),
            datetime.now(),
        )

        processed = self.replacer.process_message(message)
        assert processed is not None
        self.replacer.flush_batch([processed])

        assert _issue_count() == []
        assert _issue_count(total=True) == [{"count": 1, "group_id": 1}]
Exemplo n.º 6
0
 def _wrap(self, msg: Tuple[Any, ...]) -> Message[KafkaPayload]:
     return Message(
         Partition(Topic("replacements"), 0),
         0,
         KafkaPayload(None,
                      json.dumps(msg).encode("utf-8"), []),
         datetime.now(),
     )
Exemplo n.º 7
0
    def test_offset_already_processed(self) -> None:
        """
        Don't process an offset that already exists in Redis.
        """
        set_config("skip_seen_offsets", True)
        self.event["project_id"] = self.project_id
        self.event["group_id"] = 1
        self.event["primary_hash"] = "a" * 32
        write_unprocessed_events(self.storage, [self.event])

        key = f"replacement:{CONSUMER_GROUP}:errors:1"
        redis_client.set(key, 42)

        old_offset: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            41,
            KafkaPayload(
                None,
                json.dumps((
                    2,
                    ReplacementType.END_UNMERGE,
                    {},
                )).encode("utf-8"),
                [],
            ),
            datetime.now(),
        )

        same_offset: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            42,
            KafkaPayload(
                None,
                json.dumps((
                    2,
                    ReplacementType.END_UNMERGE,
                    {},
                )).encode("utf-8"),
                [],
            ),
            datetime.now(),
        )

        assert self.replacer.process_message(old_offset) is None
        assert self.replacer.process_message(same_offset) is None
Exemplo n.º 8
0
def test_multistorage_strategy(
    processes: Optional[int],
    input_block_size: Optional[int],
    output_block_size: Optional[int],
) -> None:
    from snuba.datasets.storages import groupassignees, groupedmessages
    from tests.datasets.cdc.test_groupassignee import TestGroupassignee
    from tests.datasets.cdc.test_groupedmessage import TestGroupedMessage

    commit = Mock()

    storages = [groupassignees.storage, groupedmessages.storage]

    strategy = MultistorageConsumerProcessingStrategyFactory(
        storages,
        10,
        10,
        processes,
        input_block_size,
        output_block_size,
        TestingMetricsBackend(),
    ).create(commit)

    payloads = [
        KafkaPayload(None, b"{}", [("table", b"ignored")]),
        KafkaPayload(
            None,
            json.dumps(TestGroupassignee.INSERT_MSG).encode("utf8"),
            [("table", groupassignees.storage.get_postgres_table().encode("utf8"))],
        ),
        KafkaPayload(
            None,
            json.dumps(TestGroupedMessage.INSERT_MSG).encode("utf8"),
            [("table", groupedmessages.storage.get_postgres_table().encode("utf8"))],
        ),
    ]

    messages = [
        Message(
            Partition(Topic("topic"), 0), offset, payload, datetime.now(), offset + 1
        )
        for offset, payload in enumerate(payloads)
    ]

    with assert_changes(
        lambda: get_row_count(groupassignees.storage), 0, 1
    ), assert_changes(lambda: get_row_count(groupedmessages.storage), 0, 1):

        for message in messages:
            strategy.submit(message)

        with assert_changes(
            lambda: commit.call_args_list, [], [call({Partition(Topic("topic"), 0): 3})]
        ):
            strategy.close()
            strategy.join()
Exemplo n.º 9
0
    def poll(self, timeout: Optional[float] = None) -> Optional[Message[Tick]]:
        message = self.__consumer.poll(timeout)
        if message is None:
            return None

        try:
            commit = commit_codec.decode(message.payload)
            assert commit.orig_message_ts is not None
        except Exception:
            logger.error(
                f"Error decoding commit log message for followed group: {self.__followed_consumer_group}.",
                extra={
                    "payload": str(message.payload),
                    "offset": message.offset
                },
                exc_info=True,
            )
            return None

        if commit.group != self.__followed_consumer_group:
            return None

        previous_message = self.__previous_messages.get(commit.partition)

        result: Optional[Message[Tick]]
        if previous_message is not None:
            try:
                time_interval = Interval(previous_message.orig_message_ts,
                                         commit.orig_message_ts)
            except InvalidRangeError:
                logger.warning(
                    "Could not construct valid time interval between %r and %r!",
                    previous_message,
                    MessageDetails(commit.offset, commit.orig_message_ts),
                    exc_info=True,
                )
                return None
            else:
                result = Message(
                    message.partition,
                    message.offset,
                    Tick(
                        commit.partition.index,
                        Interval(previous_message.offset, commit.offset),
                        time_interval,
                    ).time_shift(self.__time_shift),
                    message.timestamp,
                )
        else:
            result = None

        self.__previous_messages[commit.partition] = MessageDetails(
            commit.offset, commit.orig_message_ts)

        return result
Exemplo n.º 10
0
def test_subscription_worker_consistent(
        subscription_data: SubscriptionData) -> None:
    state.set_config("event_subscription_non_consistent_sample_rate", 1)
    broker: Broker[SubscriptionTaskResult] = Broker(MemoryMessageStorage(),
                                                    TestingClock())

    result_topic = Topic("subscription-results")

    broker.create_topic(result_topic, partitions=1)

    frequency = timedelta(minutes=1)
    evaluations = 1

    subscription = Subscription(
        SubscriptionIdentifier(PartitionId(0), uuid1()),
        subscription_data,
    )

    store = DummySubscriptionDataStore()
    store.create(subscription.identifier.uuid, subscription.data)

    metrics = TestingMetricsBackend()

    dataset = get_dataset("events")
    worker = SubscriptionWorker(
        dataset,
        ThreadPoolExecutor(),
        {
            0:
            SubscriptionScheduler(store, PartitionId(0), timedelta(),
                                  DummyMetricsBackend(strict=True))
        },
        broker.get_producer(),
        result_topic,
        metrics,
    )

    now = datetime(2000, 1, 1)

    tick = Tick(
        offsets=Interval(0, 1),
        timestamps=Interval(now - (frequency * evaluations), now),
    )

    worker.process_message(Message(Partition(Topic("events"), 0), 0, tick,
                                   now))

    time.sleep(0.1)

    assert (len([
        m for m in metrics.calls
        if isinstance(m, Increment) and m.name == "consistent"
    ]) == 1)
Exemplo n.º 11
0
    def eventstream(*, dataset: Dataset) -> RespTuple:
        record = json.loads(http_request.data)

        version = record[0]
        if version != 2:
            raise RuntimeError("Unsupported protocol version: %s" % record)

        message: Message[KafkaPayload] = Message(
            Partition(Topic("topic"), 0),
            0,
            KafkaPayload(None, http_request.data, []),
            datetime.now(),
        )

        type_ = record[1]

        storage = dataset.get_default_entity().get_writable_storage()
        assert storage is not None

        if type_ == "insert":
            from arroyo.processing.strategies.streaming import (
                KafkaConsumerStrategyFactory,
            )

            from snuba.consumers.consumer import build_batch_writer, process_message

            table_writer = storage.get_table_writer()
            stream_loader = table_writer.get_stream_loader()
            strategy = KafkaConsumerStrategyFactory(
                stream_loader.get_pre_filter(),
                functools.partial(
                    process_message, stream_loader.get_processor(), "consumer_grouup"
                ),
                build_batch_writer(table_writer, metrics=metrics),
                max_batch_size=1,
                max_batch_time=1.0,
                processes=None,
                input_block_size=None,
                output_block_size=None,
            ).create(lambda offsets: None)
            strategy.submit(message)
            strategy.close()
            strategy.join()
        else:
            from snuba.replacer import ReplacerWorker

            worker = ReplacerWorker(storage, "consumer_group", metrics=metrics)
            processed = worker.process_message(message)
            if processed is not None:
                batch = [processed]
                worker.flush_batch(batch)

        return ("ok", 200, {"Content-Type": "text/plain"})
Exemplo n.º 12
0
 def set_decoded_future_result(
     encoded_future: Future[Message[TEncoded]], ) -> None:
     try:
         message = encoded_future.result()
     except Exception as e:
         decoded_future.set_exception(e)
     else:
         decoded_future.set_result(
             Message(
                 message.partition,
                 message.offset,
                 payload,
                 message.timestamp,
             ))
Exemplo n.º 13
0
 def test_skip_kafka_message(self) -> None:
     state.set_config("kafka_messages_to_skip",
                      "[snuba-test-lol:1:2,snuba-test-yeet:0:1]")
     assert skip_kafka_message(
         Message(
             Partition(Topic("snuba-test-lol"), 1),
             2,
             KafkaPayload(None, b"", []),
             datetime.now(),
         ))
     assert skip_kafka_message(
         Message(
             Partition(Topic("snuba-test-yeet"), 0),
             1,
             KafkaPayload(None, b"", []),
             datetime.now(),
         ))
     assert not skip_kafka_message(
         Message(
             Partition(Topic("snuba-test-lol"), 2),
             1,
             KafkaPayload(None, b"", []),
             datetime.now(),
         ))
Exemplo n.º 14
0
def test_kafka_filter_header_with_bypass() -> None:
    header_filter = KafkaHeaderFilterWithBypass("should_drop", "1", 5)
    message = Message(
        Partition(Topic("random"), 1),
        1,
        KafkaPayload(b"key", b"value", [("should_drop", b"1")]),
        datetime.now(),
    )

    for _ in range(3):
        assert header_filter.should_drop(message) is True
        assert header_filter.should_drop(message) is True
        assert header_filter.should_drop(message) is True
        assert header_filter.should_drop(message) is True
        assert header_filter.should_drop(message) is False
Exemplo n.º 15
0
    def test_unmerge_hierarchical_insert(self) -> None:
        self.event["project_id"] = self.project_id
        self.event["group_id"] = 1
        self.event["primary_hash"] = "b" * 32
        self.event["data"]["hierarchical_hashes"] = ["a" * 32]
        write_unprocessed_events(self.storage, [self.event])

        assert self._issue_count(self.project_id) == [{
            "count": 1,
            "group_id": 1
        }]

        timestamp = datetime.now(tz=pytz.utc)

        project_id = self.project_id

        message: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            42,
            KafkaPayload(
                None,
                json.dumps((
                    2,
                    ReplacementType.END_UNMERGE_HIERARCHICAL,
                    {
                        "project_id": project_id,
                        "previous_group_id": 1,
                        "new_group_id": 2,
                        "hierarchical_hash": "a" * 32,
                        "primary_hash": "b" * 32,
                        "datetime":
                        timestamp.strftime(PAYLOAD_DATETIME_FORMAT),
                    },
                )).encode("utf-8"),
                [],
            ),
            datetime.now(),
        )

        processed = self.replacer.process_message(message)
        assert processed is not None
        self.replacer.flush_batch([processed])

        assert self._issue_count(self.project_id) == [{
            "count": 1,
            "group_id": 2
        }]
Exemplo n.º 16
0
    def poll(self) -> None:
        while self.__queue:
            if not self.__queue[0][1].future.done():
                break

            message, result_future = self.__queue.popleft()

            self.__next_step.submit(
                Message(
                    message.partition,
                    message.offset,
                    SubscriptionTaskResult(result_future.task,
                                           result_future.future.result()),
                    message.timestamp,
                ))

        self.__next_step.poll()
Exemplo n.º 17
0
    def submit(
        self,
        message: Message[Sequence[Tuple[StorageKey,
                                        Union[None, JSONRowInsertBatch,
                                              ReplacementBatch]]]],
    ) -> None:
        assert not self.__closed

        for storage_key, payload in message.payload:
            self.__steps[storage_key].submit(
                Message(
                    message.partition,
                    message.offset,
                    payload,
                    message.timestamp,
                    message.next_offset,
                ))
Exemplo n.º 18
0
    def submit(self, message: Message[Tick]) -> None:
        assert not self.__closed

        # Update self.__offset_high_watermark
        self.__update_offset_high_watermark(message)

        should_commit = self.__should_commit(message)
        offset_to_commit = self.__offset_high_watermark if should_commit else None

        self.__next_step.submit(
            Message(
                message.partition,
                message.offset,
                CommittableTick(message.payload, offset_to_commit),
                message.timestamp,
            ))
        if should_commit:
            self.__offset_low_watermark = self.__offset_high_watermark
Exemplo n.º 19
0
    def test_unmerge_insert(self) -> None:
        self.event["project_id"] = self.project_id
        self.event["group_id"] = 1
        self.event["primary_hash"] = "a" * 32
        write_unprocessed_events(self.storage, [self.event])

        assert self._issue_count(self.project_id) == [{
            "count": 1,
            "group_id": 1
        }]

        timestamp = datetime.now(tz=pytz.utc)

        project_id = self.project_id

        message: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            42,
            KafkaPayload(
                None,
                json.dumps((
                    2,
                    "end_unmerge",
                    {
                        "project_id": project_id,
                        "previous_group_id": 1,
                        "new_group_id": 2,
                        "hashes": ["a" * 32],
                        "datetime":
                        timestamp.strftime(PAYLOAD_DATETIME_FORMAT),
                    },
                )).encode("utf-8"),
                [],
            ),
            datetime.now(),
        )

        processed = self.replacer.process_message(message)
        self.replacer.flush_batch([processed])

        assert self._issue_count(self.project_id) == [{
            "count": 1,
            "group_id": 2
        }]
Exemplo n.º 20
0
    def poll(self, timeout: Optional[float] = None) -> Optional[Message[Tick]]:
        message = self.__consumer.poll(timeout)
        if message is None:
            return None

        previous_message = self.__previous_messages.get(message.partition)

        result: Optional[Message[Tick]]
        if previous_message is not None:
            try:
                time_interval = Interval(previous_message.timestamp,
                                         message.timestamp)
                if (self.__min_interval is not None
                        and time_interval.upper - time_interval.lower <
                        self.__min_interval):
                    return None

            except InvalidRangeError:
                logger.warning(
                    "Could not construct valid time interval between %r and %r!",
                    previous_message,
                    message,
                    exc_info=True,
                )
                return None
            else:
                result = Message(
                    message.partition,
                    previous_message.offset,
                    Tick(
                        None,
                        Interval(previous_message.offset, message.offset),
                        time_interval,
                    ).time_shift(self.__time_shift),
                    message.timestamp,
                )
        else:
            result = None

        self.__previous_messages[message.partition] = MessageDetails(
            message.offset, message.timestamp)

        return result
Exemplo n.º 21
0
    def test_delete_groups_insert(self) -> None:
        self.event["project_id"] = self.project_id
        self.event["group_id"] = 1
        write_unprocessed_events(self.storage, [self.event])

        assert self._issue_count(self.project_id) == [{
            "count": 1,
            "group_id": 1
        }]

        timestamp = datetime.utcnow()

        project_id = self.project_id

        message: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            42,
            KafkaPayload(
                None,
                json.dumps((
                    2,
                    ReplacementType.END_DELETE_GROUPS,
                    {
                        "project_id": project_id,
                        "group_ids": [1],
                        "datetime":
                        timestamp.strftime(PAYLOAD_DATETIME_FORMAT),
                    },
                )).encode("utf-8"),
                [],
            ),
            datetime.now(),
        )

        processed = self.replacer.process_message(message)
        self.replacer.flush_batch([processed])

        assert self._issue_count(self.project_id) == []

        # Count is still zero after Redis flushed and parts merged
        self._clear_redis_and_force_merge()
        assert self._issue_count(self.project_id) == []
Exemplo n.º 22
0
def generate_message(
    entity_key: EntityKey,
    subscription_identifier: Optional[SubscriptionIdentifier] = None,
) -> Iterator[Message[KafkaPayload]]:
    codec = SubscriptionScheduledTaskEncoder()
    epoch = datetime(1970, 1, 1)
    i = 0

    if subscription_identifier is None:
        subscription_identifier = SubscriptionIdentifier(
            PartitionId(1), uuid.uuid1())

    data_dict = {}
    if entity_key in (EntityKey.METRICS_SETS, EntityKey.METRICS_COUNTERS):
        data_dict = {"organization": 1}

    entity_subscription = ENTITY_KEY_TO_SUBSCRIPTION_MAPPER[entity_key](
        data_dict=data_dict)

    while True:
        payload = codec.encode(
            ScheduledSubscriptionTask(
                epoch + timedelta(minutes=i),
                SubscriptionWithMetadata(
                    entity_key,
                    Subscription(
                        subscription_identifier,
                        SubscriptionData(
                            project_id=1,
                            time_window_sec=60,
                            resolution_sec=60,
                            query=f"MATCH ({entity_key.value}) SELECT count()",
                            entity_subscription=entity_subscription,
                        ),
                    ),
                    i + 1,
                ),
            ))

        yield Message(Partition(Topic("test"), 0), i, payload, epoch)
        i += 1
Exemplo n.º 23
0
    def test_reset_consumer_group_offset_check(self) -> None:
        set_config("skip_seen_offsets", True)
        self.event["project_id"] = self.project_id
        self.event["group_id"] = 1
        self.event["primary_hash"] = "a" * 32
        write_unprocessed_events(self.storage, [self.event])

        message: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            42,
            KafkaPayload(
                None,
                json.dumps((
                    2,
                    ReplacementType.END_UNMERGE,
                    {
                        "project_id":
                        self.project_id,
                        "previous_group_id":
                        1,
                        "new_group_id":
                        2,
                        "hashes": ["a" * 32],
                        "datetime":
                        datetime.utcnow().strftime(PAYLOAD_DATETIME_FORMAT),
                    },
                )).encode("utf-8"),
                [],
            ),
            datetime.now(),
        )

        self.replacer.flush_batch([self.replacer.process_message(message)])

        set_config(replacer.RESET_CHECK_CONFIG, f"[{CONSUMER_GROUP}]")

        # Offset to check against should be reset so this message shouldn't be skipped
        assert self.replacer.process_message(message) is not None
Exemplo n.º 24
0
    def test_process_offset_twice(self) -> None:
        set_config("skip_seen_offsets", True)
        self.event["project_id"] = self.project_id
        self.event["group_id"] = 1
        self.event["primary_hash"] = "a" * 32
        write_unprocessed_events(self.storage, [self.event])

        message: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            42,
            KafkaPayload(
                None,
                json.dumps((
                    2,
                    ReplacementType.END_UNMERGE,
                    {
                        "project_id":
                        self.project_id,
                        "previous_group_id":
                        1,
                        "new_group_id":
                        2,
                        "hashes": ["a" * 32],
                        "datetime":
                        datetime.utcnow().strftime(PAYLOAD_DATETIME_FORMAT),
                    },
                )).encode("utf-8"),
                [],
            ),
            datetime.now(),
        )

        processed = self.replacer.process_message(message)
        self.replacer.flush_batch([processed])

        # should be None since the offset should be in Redis, indicating it should be skipped
        assert self.replacer.process_message(message) is None
Exemplo n.º 25
0
    def test_reprocessing_flow_insert(self) -> None:
        # We have a group that contains two events, 1 and 2.
        self.event["project_id"] = self.project_id
        self.event["group_id"] = 1
        self.event["event_id"] = event_id = "00e24a150d7f4ee4b142b61b4d893b6d"
        write_unprocessed_events(self.storage, [self.event])
        self.event["event_id"] = event_id2 = "00e24a150d7f4ee4b142b61b4d893b6e"
        write_unprocessed_events(self.storage, [self.event])

        assert self._issue_count(self.project_id) == [{
            "count": 2,
            "group_id": 1
        }]

        project_id = self.project_id

        message: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            41,
            KafkaPayload(
                None,
                json.dumps((
                    2,
                    ReplacementType.TOMBSTONE_EVENTS,
                    {
                        "project_id": project_id,
                        "event_ids": [event_id]
                    },
                )).encode("utf-8"),
                [],
            ),
            datetime.now(),
        )

        # The user chooses to reprocess a subset of the group and throw away
        # the other events. Event 1 gets manually tombstoned by Sentry while
        # Event 2 prevails.
        processed = self.replacer.process_message(message)
        self.replacer.flush_batch([processed])

        # At this point the count doesn't make any sense but we don't care.
        assert self._issue_count(self.project_id) == [{
            "count": 2,
            "group_id": 1
        }]

        # The reprocessed event is inserted with a guaranteed-new group ID but
        # the *same* event ID (this is why we need to skip tombstoning this
        # event ID)
        self.event["group_id"] = 2
        write_unprocessed_events(self.storage, [self.event])

        message: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            42,
            KafkaPayload(
                None,
                json.dumps((
                    2,
                    ReplacementType.EXCLUDE_GROUPS,
                    {
                        "project_id": project_id,
                        "group_ids": [1]
                    },
                )).encode("utf-8"),
                [],
            ),
            datetime.now(),
        )

        # Group 1 is excluded from queries. At this point we have almost a
        # regular group deletion, except only a subset of events have been
        # tombstoned (the ones that will *not* be reprocessed).
        processed = self.replacer.process_message(message)
        self.replacer.flush_batch([processed])

        # Group 2 should contain the one event that the user chose to
        # reprocess, and Group 1 should be gone. (Note: In the product Group 2
        # looks identical to Group 1, including short ID).
        assert self._issue_count(self.project_id) == [{
            "count": 1,
            "group_id": 2
        }]
        assert self._get_group_id(project_id, event_id2) == 2
        assert not self._get_group_id(project_id, event_id)
def test_combined_scheduler_and_executor() -> None:
    state.set_config("subscription_mode_events", "new")
    create_subscription()
    epoch = datetime(1970, 1, 1)

    dataset = get_dataset("events")
    entity_names = ["events"]
    num_partitions = 2
    max_concurrent_queries = 2
    total_concurrent_queries = 2
    metrics = TestingMetricsBackend()

    commit = mock.Mock()
    partitions = mock.Mock()

    topic = Topic("snuba-commit-log")
    partition = Partition(topic, 0)
    stale_threshold_seconds = None
    result_topic = "events-subscription-results"
    schedule_ttl = 60

    producer = KafkaProducer(
        build_kafka_producer_configuration(
            SnubaTopic.SUBSCRIPTION_RESULTS_EVENTS))

    with closing(producer):
        factory = CombinedSchedulerExecutorFactory(
            dataset,
            entity_names,
            num_partitions,
            max_concurrent_queries,
            total_concurrent_queries,
            producer,
            metrics,
            stale_threshold_seconds,
            result_topic,
            schedule_ttl,
        )

        strategy = factory.create_with_partitions(commit, partitions)

        message = Message(
            partition,
            4,
            Tick(
                0,
                offsets=Interval(1, 3),
                timestamps=Interval(epoch, epoch + timedelta(seconds=60)),
            ),
            epoch,
        )
        strategy.submit(message)

        # Wait for the query to be executed and the result message produced
        for i in range(10):
            time.sleep(0.5)
            strategy.poll()
            if commit.call_count == 1:
                break

        assert commit.call_count == 1
        strategy.close()
        strategy.join()
Exemplo n.º 27
0
def test_tick_consumer_non_monotonic() -> None:
    clock = TestingClock()
    broker: Broker[KafkaPayload] = Broker(MemoryMessageStorage(), clock)

    epoch = datetime.fromtimestamp(clock.time())

    topic = Topic("messages")
    followed_consumer_group = "events"
    partition = Partition(topic, 0)

    broker.create_topic(topic, partitions=1)

    producer = broker.get_producer()

    inner_consumer = broker.get_consumer("group")

    consumer = CommitLogTickConsumer(inner_consumer, followed_consumer_group)

    def _assignment_callback(offsets: Mapping[Partition, int]) -> None:
        assert inner_consumer.tell() == {partition: 0}
        assert consumer.tell() == {partition: 0}

    assignment_callback = mock.Mock(side_effect=_assignment_callback)

    consumer.subscribe([topic], on_assign=assignment_callback)

    producer.produce(
        partition,
        commit_codec.encode(
            Commit(followed_consumer_group, partition, 0, epoch)),
    ).result()

    clock.sleep(1)

    producer.produce(
        partition,
        commit_codec.encode(
            Commit(followed_consumer_group, partition, 1,
                   epoch + timedelta(seconds=1))),
    ).result()

    with assert_changes(lambda: assignment_callback.called, False, True):
        assert consumer.poll() is None

    assert consumer.tell() == {partition: 1}

    with assert_changes(consumer.tell, {partition: 1}, {partition: 2}):
        assert consumer.poll() == Message(
            partition,
            1,
            Tick(
                0,
                offsets=Interval(0, 1),
                timestamps=Interval(epoch, epoch + timedelta(seconds=1)),
            ),
            epoch + timedelta(seconds=1),
        )

    clock.sleep(-1)

    producer.produce(
        partition,
        commit_codec.encode(
            Commit(followed_consumer_group, partition, 2, epoch)),
    ).result()

    with assert_changes(consumer.tell, {partition: 2}, {partition: 3}):
        assert consumer.poll() is None

    clock.sleep(2)

    producer.produce(
        partition,
        commit_codec.encode(
            Commit(followed_consumer_group, partition, 3,
                   epoch + timedelta(seconds=2))),
    ).result()

    with assert_changes(consumer.tell, {partition: 3}, {partition: 4}):
        assert consumer.poll() == Message(
            partition,
            3,
            Tick(
                0,
                offsets=Interval(1, 3),
                timestamps=Interval(epoch + timedelta(seconds=1),
                                    epoch + timedelta(seconds=2)),
            ),
            epoch + timedelta(seconds=2),
        )
Exemplo n.º 28
0
def test_tick_consumer(time_shift: Optional[timedelta]) -> None:
    clock = TestingClock()
    broker: Broker[KafkaPayload] = Broker(MemoryMessageStorage(), clock)

    epoch = datetime.fromtimestamp(clock.time())

    topic = Topic("messages")
    followed_consumer_group = "events"

    broker.create_topic(topic, partitions=1)

    producer = broker.get_producer()

    for partition, offsets in enumerate([[0, 1, 2], [0]]):
        for offset in offsets:
            payload = commit_codec.encode(
                Commit(followed_consumer_group, Partition(topic, partition),
                       offset, epoch))
            producer.produce(Partition(topic, 0), payload).result()

    inner_consumer = broker.get_consumer("group")

    consumer = CommitLogTickConsumer(inner_consumer,
                                     followed_consumer_group,
                                     time_shift=time_shift)

    if time_shift is None:
        time_shift = timedelta()

    def _assignment_callback(offsets: Mapping[Partition, int]) -> None:
        assert consumer.tell() == {
            Partition(topic, 0): 0,
        }

    assignment_callback = mock.Mock(side_effect=_assignment_callback)

    consumer.subscribe([topic], on_assign=assignment_callback)

    with assert_changes(lambda: assignment_callback.called, False, True):
        # consume 0, 0
        assert consumer.poll() is None

    assert consumer.tell() == {
        Partition(topic, 0): 1,
    }

    # consume 0, 1
    assert consumer.poll() == Message(
        Partition(topic, 0),
        1,
        Tick(0, offsets=Interval(0, 1),
             timestamps=Interval(epoch, epoch)).time_shift(time_shift),
        epoch,
    )

    assert consumer.tell() == {
        Partition(topic, 0): 2,
    }

    # consume 0, 2
    assert consumer.poll() == Message(
        Partition(topic, 0),
        2,
        Tick(0, offsets=Interval(1, 2),
             timestamps=Interval(epoch, epoch)).time_shift(time_shift),
        epoch,
    )

    assert consumer.tell() == {
        Partition(topic, 0): 3,
    }

    # consume 1, 0
    assert consumer.poll() is None

    assert consumer.tell() == {
        Partition(topic, 0): 4,
    }

    # consume no message
    assert consumer.poll() is None

    assert consumer.tell() == {
        Partition(topic, 0): 4,
    }

    consumer.seek({Partition(topic, 0): 1})

    assert consumer.tell() == {
        Partition(topic, 0): 1,
    }

    # consume 0, 1
    assert consumer.poll() is None

    assert consumer.tell() == {
        Partition(topic, 0): 2,
    }

    # consume 0, 2
    assert consumer.poll() == Message(
        Partition(topic, 0),
        2,
        Tick(0, offsets=Interval(1, 2),
             timestamps=Interval(epoch, epoch)).time_shift(time_shift),
        epoch,
    )

    assert consumer.tell() == {
        Partition(topic, 0): 3,
    }

    with pytest.raises(ConsumerError):
        consumer.seek({Partition(topic, -1): 0})
Exemplo n.º 29
0
def test_subscription_worker(subscription_data: SubscriptionData) -> None:
    broker: Broker[SubscriptionTaskResult] = Broker(MemoryMessageStorage(),
                                                    TestingClock())

    result_topic = Topic("subscription-results")

    broker.create_topic(result_topic, partitions=1)

    frequency = timedelta(minutes=1)
    evaluations = 3

    subscription = Subscription(
        SubscriptionIdentifier(PartitionId(0), uuid1()),
        subscription_data,
    )

    store = DummySubscriptionDataStore()
    store.create(subscription.identifier.uuid, subscription.data)

    metrics = DummyMetricsBackend(strict=True)

    dataset = get_dataset("events")
    worker = SubscriptionWorker(
        dataset,
        ThreadPoolExecutor(),
        {
            0: SubscriptionScheduler(store, PartitionId(0), timedelta(),
                                     metrics)
        },
        broker.get_producer(),
        result_topic,
        metrics,
    )

    now = datetime(2000, 1, 1)

    tick = Tick(
        offsets=Interval(0, 1),
        timestamps=Interval(now - (frequency * evaluations), now),
    )

    result_futures = worker.process_message(
        Message(Partition(Topic("events"), 0), 0, tick, now))

    assert result_futures is not None and len(result_futures) == evaluations

    # Publish the results.
    worker.flush_batch([result_futures])

    # Check to make sure the results were published.
    # NOTE: This does not cover the ``SubscriptionTaskResultCodec``!
    consumer = broker.get_consumer("group")
    consumer.subscribe([result_topic])

    for i in range(evaluations):
        timestamp = now - frequency * (evaluations - i)

        message = consumer.poll()
        assert message is not None
        assert message.partition.topic == result_topic

        task, future = result_futures[i]
        future_result = request, result = future.result()
        assert message.payload.task.timestamp == timestamp
        assert message.payload == SubscriptionTaskResult(task, future_result)

        # NOTE: The time series extension is folded back into the request
        # body, ideally this would reference the timeseries options in
        # isolation.
        from_pattern = FunctionCall(
            String(ConditionFunctions.GTE),
            (
                Column(None, String("timestamp")),
                Literal(Datetime(timestamp - subscription.data.time_window)),
            ),
        )
        to_pattern = FunctionCall(
            String(ConditionFunctions.LT),
            (Column(None, String("timestamp")), Literal(Datetime(timestamp))),
        )

        condition = request.query.get_condition()
        assert condition is not None

        conditions = get_first_level_and_conditions(condition)

        assert any([from_pattern.match(e) for e in conditions])
        assert any([to_pattern.match(e) for e in conditions])

        assert result == {
            "meta": [{
                "name": "count",
                "type": "UInt64"
            }],
            "data": [{
                "count": 0
            }],
        }
Exemplo n.º 30
0
def test_tick_consumer_non_monotonic() -> None:
    clock = TestingClock()
    broker: Broker[int] = Broker(MemoryMessageStorage(), clock)

    epoch = datetime.fromtimestamp(clock.time())

    topic = Topic("messages")
    partition = Partition(topic, 0)

    broker.create_topic(topic, partitions=1)

    producer = broker.get_producer()

    inner_consumer = broker.get_consumer("group")

    consumer = TickConsumer(inner_consumer)

    def _assignment_callback(offsets: Mapping[Partition, int]) -> None:
        assert inner_consumer.tell() == {partition: 0}
        assert consumer.tell() == {partition: 0}

    assignment_callback = mock.Mock(side_effect=_assignment_callback)

    consumer.subscribe([topic], on_assign=assignment_callback)

    producer.produce(partition, 0)

    clock.sleep(1)

    producer.produce(partition, 1)

    with assert_changes(lambda: assignment_callback.called, False, True):
        assert consumer.poll() is None

    assert inner_consumer.tell() == {partition: 1}
    assert consumer.tell() == {partition: 0}

    with assert_changes(
        inner_consumer.tell, {partition: 1}, {partition: 2}
    ), assert_changes(consumer.tell, {partition: 0}, {partition: 1}):
        assert consumer.poll() == Message(
            partition,
            0,
            Tick(
                offsets=Interval(0, 1),
                timestamps=Interval(epoch, epoch + timedelta(seconds=1)),
            ),
            epoch + timedelta(seconds=1),
        )

    clock.sleep(-1)

    producer.produce(partition, 2)

    with assert_changes(
        inner_consumer.tell, {partition: 2}, {partition: 3}
    ), assert_does_not_change(consumer.tell, {partition: 1}):
        assert consumer.poll() is None

    clock.sleep(2)

    producer.produce(partition, 3)

    with assert_changes(
        inner_consumer.tell, {partition: 3}, {partition: 4}
    ), assert_changes(consumer.tell, {partition: 1}, {partition: 3}):
        assert consumer.poll() == Message(
            partition,
            1,
            Tick(
                offsets=Interval(1, 3),
                timestamps=Interval(
                    epoch + timedelta(seconds=1), epoch + timedelta(seconds=2)
                ),
            ),
            epoch + timedelta(seconds=2),
        )