示例#1
0
    def test_offsets(self):
        event = self.event

        message: Message[KafkaPayload] = Message(
            Partition(Topic("events"), 456),
            123,
            KafkaPayload(
                None, json.dumps((0, "insert", event)).encode("utf-8")
            ),  # event doesn't really matter
            datetime.now(),
        )

        test_worker = ConsumerWorker(
            self.dataset,
            producer=FakeConfluentKafkaProducer(),
            replacements_topic=Topic(
                enforce_table_writer(self.dataset)
                .get_stream_loader()
                .get_replacement_topic_spec()
                .topic_name
            ),
            metrics=self.metrics,
        )
        batch = [test_worker.process_message(message)]
        test_worker.flush_batch(batch)

        assert self.clickhouse.execute(
            "SELECT project_id, event_id, offset, partition FROM %s" % self.table
        ) == [(self.event["project_id"], self.event["event_id"], 123, 456)]
示例#2
0
    def test_skip_too_old(self):
        test_worker = ConsumerWorker(
            self.dataset,
            producer=FakeConfluentKafkaProducer(),
            replacements_topic=Topic(
                enforce_table_writer(self.dataset)
                .get_stream_loader()
                .get_replacement_topic_spec()
                .topic_name
            ),
            metrics=self.metrics,
        )

        event = self.event
        old_timestamp = datetime.utcnow() - timedelta(days=300)
        old_timestamp_str = old_timestamp.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
        event["datetime"] = old_timestamp_str
        event["data"]["datetime"] = old_timestamp_str
        event["data"]["received"] = int(calendar.timegm(old_timestamp.timetuple()))

        message: Message[KafkaPayload] = Message(
            Partition(Topic("events"), 1),
            42,
            KafkaPayload(None, json.dumps((0, "insert", event)).encode("utf-8")),
            datetime.now(),
        )

        assert test_worker.process_message(message) is None
示例#3
0
    def eventstream(*, dataset: Dataset):
        ensure_table_exists(dataset)
        record = json.loads(http_request.data)

        version = record[0]
        if version != 2:
            raise RuntimeError("Unsupported protocol version: %s" % record)

        message: Message[KafkaPayload] = Message(
            Partition(Topic("topic"), 0),
            0,
            KafkaPayload(None, http_request.data),
            datetime.now(),
        )

        type_ = record[1]
        metrics = DummyMetricsBackend()
        if type_ == "insert":
            from snuba.consumer import ConsumerWorker

            worker = ConsumerWorker(dataset, metrics=metrics)
        else:
            from snuba.replacer import ReplacerWorker

            worker = ReplacerWorker(clickhouse_rw, dataset, metrics=metrics)

        processed = worker.process_message(message)
        if processed is not None:
            batch = [processed]
            worker.flush_batch(batch)

        return ("ok", 200, {"Content-Type": "text/plain"})
示例#4
0
    def test_send_message(
        self,
        value: str,
        expected: Optional[ProcessedMessage],
    ) -> None:
        storage = get_storage("groupedmessages")
        snapshot_id = uuid1()
        transact_data = TransactionData(xmin=100,
                                        xmax=200,
                                        xip_list=[120, 130])

        worker = SnapshotAwareWorker(
            storage=storage,
            producer=FakeConfluentKafkaProducer(),
            snapshot_id=str(snapshot_id),
            transaction_data=transact_data,
            replacements_topic=None,
            metrics=DummyMetricsBackend(strict=True),
        )

        message: Message[KafkaPayload] = Message(
            Partition(Topic("topic"), 0),
            1,
            KafkaPayload(
                None,
                value.encode("utf-8"),
                [("table", "sentry_groupedmessage".encode())],
            ),
            datetime.now(),
        )

        ret = worker.process_message(message)
        assert ret == expected
示例#5
0
 def _wrap(self, msg: str) -> Message[KafkaPayload]:
     return Message(
         Partition(Topic("replacements"), 0),
         0,
         KafkaPayload(None, json.dumps(msg).encode("utf-8")),
         datetime.now(),
     )
示例#6
0
    def test_delete_groups_insert(self):
        self.event["project_id"] = self.project_id
        self.event["group_id"] = 1
        self.write_raw_events(self.event)

        assert self._issue_count(self.project_id) == [{"count": 1, "group_id": 1}]

        timestamp = datetime.now(tz=pytz.utc)

        project_id = self.project_id

        message: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            42,
            KafkaPayload(
                None,
                json.dumps(
                    (
                        2,
                        "end_delete_groups",
                        {
                            "project_id": project_id,
                            "group_ids": [1],
                            "datetime": timestamp.strftime(PAYLOAD_DATETIME_FORMAT),
                        },
                    )
                ).encode("utf-8"),
            ),
            datetime.now(),
        )

        processed = self.replacer.process_message(message)
        self.replacer.flush_batch([processed])

        assert self._issue_count(self.project_id) == []
示例#7
0
 def __make_msg(self, partition: int, offset: int, payload: str,
                headers: Headers) -> Message[KafkaPayload]:
     return Message(
         partition=Partition(Topic("topic"), partition),
         offset=offset,
         payload=KafkaPayload(b"key", payload.encode(), headers),
         timestamp=datetime(2019, 6, 19, 6, 46, 28),
     )
示例#8
0
 def encode(self, value: SubscriptionResult) -> KafkaPayload:
     subscription_id = str(value.task.task.identifier)
     return KafkaPayload(
         subscription_id.encode("utf-8"),
         json.dumps({
             "version": 1,
             "payload": {
                 "subscription_id": subscription_id,
                 "values": value.result,
                 "timestamp": value.task.timestamp.isoformat(),
             },
         }).encode("utf-8"),
     )
示例#9
0
    def test_delete_tag_promoted_insert(self):
        self.event["project_id"] = self.project_id
        self.event["group_id"] = 1
        self.event["data"]["tags"].append(["browser.name", "foo"])
        self.event["data"]["tags"].append(["notbrowser", "foo"])
        self.write_raw_events(self.event)

        project_id = self.project_id

        def _issue_count(total=False):
            return json.loads(
                self.app.post(
                    "/query",
                    data=json.dumps({
                        "project": [project_id],
                        "aggregations": [["count()", "", "count"]],
                        "conditions": [["tags[browser.name]", "=", "foo"]]
                        if not total else [],
                        "groupby": ["group_id"],
                    }),
                ).data)["data"]

        assert _issue_count() == [{"count": 1, "group_id": 1}]
        assert _issue_count(total=True) == [{"count": 1, "group_id": 1}]

        timestamp = datetime.now(tz=pytz.utc)

        message: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            42,
            KafkaPayload(
                None,
                json.dumps((
                    2,
                    "end_delete_tag",
                    {
                        "project_id": project_id,
                        "tag": "browser.name",
                        "datetime":
                        timestamp.strftime(PAYLOAD_DATETIME_FORMAT),
                    },
                )).encode("utf-8"),
            ),
            datetime.now(),
        )

        processed = self.replacer.process_message(message)
        self.replacer.flush_batch([processed])

        assert _issue_count() == []
        assert _issue_count(total=True) == [{"count": 1, "group_id": 1}]
示例#10
0
def get_messages(events_file) -> Sequence[Message[KafkaPayload]]:
    "Create a fake Kafka message for each JSON event in the file."
    messages: MutableSequence[Message[KafkaPayload]] = []
    raw_events = open(events_file).readlines()
    for raw_event in raw_events:
        messages.append(
            Message(
                Partition(Topic("events"), 1),
                0,
                KafkaPayload(None, raw_event.encode("utf-8")),
                datetime.now(),
            ),
        )
    return messages
示例#11
0
    def test_commit_log_consumer(self) -> None:
        # XXX: This would be better as an integration test (or at least a test
        # against an abstract Producer interface) instead of against a test against
        # a mock.
        commit_log_producer = FakeConfluentKafkaProducer()

        consumer: KafkaConsumer[int] = KafkaConsumerWithCommitLog(
            {
                **self.configuration,
                "auto.offset.reset": "earliest",
                "enable.auto.commit": "false",
                "enable.auto.offset.store": "false",
                "enable.partition.eof": "true",
                "group.id": "test",
                "session.timeout.ms": 10000,
            },
            codec=self.codec,
            producer=commit_log_producer,
            commit_log_topic=Topic("commit-log"),
        )

        with self.get_topic() as topic, closing(consumer) as consumer:
            consumer.subscribe([topic])

            with closing(self.get_producer()) as producer:
                producer.produce(topic, 0).result(5.0)

            message = consumer.poll(
                10.0)  # XXX: getting the subscription is slow
            assert isinstance(message, Message)

            consumer.stage_offsets(
                {message.partition: message.get_next_offset()})

            assert consumer.commit_offsets() == {
                Partition(topic, 0): message.get_next_offset()
            }

            assert len(commit_log_producer.messages) == 1
            commit_message = commit_log_producer.messages[0]
            assert commit_message.topic() == "commit-log"

            assert CommitCodec().decode(
                KafkaPayload(commit_message.key(),
                             commit_message.value())) == Commit(
                                 "test", Partition(topic, 0),
                                 message.get_next_offset())
示例#12
0
    def test_flattened_tags(self):
        self.event["project_id"] = self.project_id
        self.event["group_id"] = 1
        # | and = are intentional to test the escaping logic when computing the
        # flattened_tags on tag deletions
        self.event["data"]["tags"] = []
        self.event["data"]["tags"].append(["browser|name", "foo=1"])
        self.event["data"]["tags"].append(["browser|to_delete", "foo=2"])
        self.event["data"]["tags"].append(["notbrowser", "foo\\3"])
        self.event["data"]["tags"].append(["notbrowser2", "foo4"])
        self.write_raw_events(self.event)

        project_id = self.project_id

        def _fetch_flattened_tags():
            return json.loads(
                self.app.post(
                    "/query",
                    data=json.dumps({
                        "project": [project_id],
                        "selected_columns": [
                            "_tags_flattened",
                            "tags.key",
                            "tags.value",
                        ],
                    }),
                ).data)["data"]

        assert _fetch_flattened_tags() == [{
            "tags.key": [
                "browser|name",
                "browser|to_delete",
                "notbrowser",
                "notbrowser2",
            ],
            "tags.value": ["foo=1", "foo=2", "foo\\3", "foo4"],
            "_tags_flattened":
            "|browser\\|name=foo\\=1||browser\\|to_delete=foo\\=2||notbrowser=foo\\\\3||notbrowser2=foo4|",
        }]

        timestamp = datetime.now(tz=pytz.utc)

        message: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            42,
            KafkaPayload(
                None,
                json.dumps((
                    2,
                    "end_delete_tag",
                    {
                        "project_id": project_id,
                        "tag": "browser|to_delete",
                        "datetime":
                        timestamp.strftime(PAYLOAD_DATETIME_FORMAT),
                    },
                )).encode("utf-8"),
            ),
            datetime.now(),
        )

        processed = self.replacer.process_message(message)
        self.replacer.flush_batch([processed])

        assert _fetch_flattened_tags() == [{
            "tags.key": ["browser|name", "notbrowser", "notbrowser2"],
            "tags.value": ["foo=1", "foo\\3", "foo4"],
            "_tags_flattened":
            "|browser\\|name=foo\\=1||notbrowser=foo\\\\3||notbrowser2=foo4|",
        }]
示例#13
0
 def encode(self, value: int) -> KafkaPayload:
     return KafkaPayload(None, f"{value}".encode("utf-8"))