def test_offsets(self): event = self.event message: Message[KafkaPayload] = Message( Partition(Topic("events"), 456), 123, KafkaPayload( None, json.dumps((0, "insert", event)).encode("utf-8") ), # event doesn't really matter datetime.now(), ) test_worker = ConsumerWorker( self.dataset, producer=FakeConfluentKafkaProducer(), replacements_topic=Topic( enforce_table_writer(self.dataset) .get_stream_loader() .get_replacement_topic_spec() .topic_name ), metrics=self.metrics, ) batch = [test_worker.process_message(message)] test_worker.flush_batch(batch) assert self.clickhouse.execute( "SELECT project_id, event_id, offset, partition FROM %s" % self.table ) == [(self.event["project_id"], self.event["event_id"], 123, 456)]
def test_skip_too_old(self): test_worker = ConsumerWorker( self.dataset, producer=FakeConfluentKafkaProducer(), replacements_topic=Topic( enforce_table_writer(self.dataset) .get_stream_loader() .get_replacement_topic_spec() .topic_name ), metrics=self.metrics, ) event = self.event old_timestamp = datetime.utcnow() - timedelta(days=300) old_timestamp_str = old_timestamp.strftime("%Y-%m-%dT%H:%M:%S.%fZ") event["datetime"] = old_timestamp_str event["data"]["datetime"] = old_timestamp_str event["data"]["received"] = int(calendar.timegm(old_timestamp.timetuple())) message: Message[KafkaPayload] = Message( Partition(Topic("events"), 1), 42, KafkaPayload(None, json.dumps((0, "insert", event)).encode("utf-8")), datetime.now(), ) assert test_worker.process_message(message) is None
def eventstream(*, dataset: Dataset): ensure_table_exists(dataset) record = json.loads(http_request.data) version = record[0] if version != 2: raise RuntimeError("Unsupported protocol version: %s" % record) message: Message[KafkaPayload] = Message( Partition(Topic("topic"), 0), 0, KafkaPayload(None, http_request.data), datetime.now(), ) type_ = record[1] metrics = DummyMetricsBackend() if type_ == "insert": from snuba.consumer import ConsumerWorker worker = ConsumerWorker(dataset, metrics=metrics) else: from snuba.replacer import ReplacerWorker worker = ReplacerWorker(clickhouse_rw, dataset, metrics=metrics) processed = worker.process_message(message) if processed is not None: batch = [processed] worker.flush_batch(batch) return ("ok", 200, {"Content-Type": "text/plain"})
def test_send_message( self, value: str, expected: Optional[ProcessedMessage], ) -> None: storage = get_storage("groupedmessages") snapshot_id = uuid1() transact_data = TransactionData(xmin=100, xmax=200, xip_list=[120, 130]) worker = SnapshotAwareWorker( storage=storage, producer=FakeConfluentKafkaProducer(), snapshot_id=str(snapshot_id), transaction_data=transact_data, replacements_topic=None, metrics=DummyMetricsBackend(strict=True), ) message: Message[KafkaPayload] = Message( Partition(Topic("topic"), 0), 1, KafkaPayload( None, value.encode("utf-8"), [("table", "sentry_groupedmessage".encode())], ), datetime.now(), ) ret = worker.process_message(message) assert ret == expected
def _wrap(self, msg: str) -> Message[KafkaPayload]: return Message( Partition(Topic("replacements"), 0), 0, KafkaPayload(None, json.dumps(msg).encode("utf-8")), datetime.now(), )
def test_delete_groups_insert(self): self.event["project_id"] = self.project_id self.event["group_id"] = 1 self.write_raw_events(self.event) assert self._issue_count(self.project_id) == [{"count": 1, "group_id": 1}] timestamp = datetime.now(tz=pytz.utc) project_id = self.project_id message: Message[KafkaPayload] = Message( Partition(Topic("replacements"), 1), 42, KafkaPayload( None, json.dumps( ( 2, "end_delete_groups", { "project_id": project_id, "group_ids": [1], "datetime": timestamp.strftime(PAYLOAD_DATETIME_FORMAT), }, ) ).encode("utf-8"), ), datetime.now(), ) processed = self.replacer.process_message(message) self.replacer.flush_batch([processed]) assert self._issue_count(self.project_id) == []
def __make_msg(self, partition: int, offset: int, payload: str, headers: Headers) -> Message[KafkaPayload]: return Message( partition=Partition(Topic("topic"), partition), offset=offset, payload=KafkaPayload(b"key", payload.encode(), headers), timestamp=datetime(2019, 6, 19, 6, 46, 28), )
def encode(self, value: SubscriptionResult) -> KafkaPayload: subscription_id = str(value.task.task.identifier) return KafkaPayload( subscription_id.encode("utf-8"), json.dumps({ "version": 1, "payload": { "subscription_id": subscription_id, "values": value.result, "timestamp": value.task.timestamp.isoformat(), }, }).encode("utf-8"), )
def test_delete_tag_promoted_insert(self): self.event["project_id"] = self.project_id self.event["group_id"] = 1 self.event["data"]["tags"].append(["browser.name", "foo"]) self.event["data"]["tags"].append(["notbrowser", "foo"]) self.write_raw_events(self.event) project_id = self.project_id def _issue_count(total=False): return json.loads( self.app.post( "/query", data=json.dumps({ "project": [project_id], "aggregations": [["count()", "", "count"]], "conditions": [["tags[browser.name]", "=", "foo"]] if not total else [], "groupby": ["group_id"], }), ).data)["data"] assert _issue_count() == [{"count": 1, "group_id": 1}] assert _issue_count(total=True) == [{"count": 1, "group_id": 1}] timestamp = datetime.now(tz=pytz.utc) message: Message[KafkaPayload] = Message( Partition(Topic("replacements"), 1), 42, KafkaPayload( None, json.dumps(( 2, "end_delete_tag", { "project_id": project_id, "tag": "browser.name", "datetime": timestamp.strftime(PAYLOAD_DATETIME_FORMAT), }, )).encode("utf-8"), ), datetime.now(), ) processed = self.replacer.process_message(message) self.replacer.flush_batch([processed]) assert _issue_count() == [] assert _issue_count(total=True) == [{"count": 1, "group_id": 1}]
def get_messages(events_file) -> Sequence[Message[KafkaPayload]]: "Create a fake Kafka message for each JSON event in the file." messages: MutableSequence[Message[KafkaPayload]] = [] raw_events = open(events_file).readlines() for raw_event in raw_events: messages.append( Message( Partition(Topic("events"), 1), 0, KafkaPayload(None, raw_event.encode("utf-8")), datetime.now(), ), ) return messages
def test_commit_log_consumer(self) -> None: # XXX: This would be better as an integration test (or at least a test # against an abstract Producer interface) instead of against a test against # a mock. commit_log_producer = FakeConfluentKafkaProducer() consumer: KafkaConsumer[int] = KafkaConsumerWithCommitLog( { **self.configuration, "auto.offset.reset": "earliest", "enable.auto.commit": "false", "enable.auto.offset.store": "false", "enable.partition.eof": "true", "group.id": "test", "session.timeout.ms": 10000, }, codec=self.codec, producer=commit_log_producer, commit_log_topic=Topic("commit-log"), ) with self.get_topic() as topic, closing(consumer) as consumer: consumer.subscribe([topic]) with closing(self.get_producer()) as producer: producer.produce(topic, 0).result(5.0) message = consumer.poll( 10.0) # XXX: getting the subscription is slow assert isinstance(message, Message) consumer.stage_offsets( {message.partition: message.get_next_offset()}) assert consumer.commit_offsets() == { Partition(topic, 0): message.get_next_offset() } assert len(commit_log_producer.messages) == 1 commit_message = commit_log_producer.messages[0] assert commit_message.topic() == "commit-log" assert CommitCodec().decode( KafkaPayload(commit_message.key(), commit_message.value())) == Commit( "test", Partition(topic, 0), message.get_next_offset())
def test_flattened_tags(self): self.event["project_id"] = self.project_id self.event["group_id"] = 1 # | and = are intentional to test the escaping logic when computing the # flattened_tags on tag deletions self.event["data"]["tags"] = [] self.event["data"]["tags"].append(["browser|name", "foo=1"]) self.event["data"]["tags"].append(["browser|to_delete", "foo=2"]) self.event["data"]["tags"].append(["notbrowser", "foo\\3"]) self.event["data"]["tags"].append(["notbrowser2", "foo4"]) self.write_raw_events(self.event) project_id = self.project_id def _fetch_flattened_tags(): return json.loads( self.app.post( "/query", data=json.dumps({ "project": [project_id], "selected_columns": [ "_tags_flattened", "tags.key", "tags.value", ], }), ).data)["data"] assert _fetch_flattened_tags() == [{ "tags.key": [ "browser|name", "browser|to_delete", "notbrowser", "notbrowser2", ], "tags.value": ["foo=1", "foo=2", "foo\\3", "foo4"], "_tags_flattened": "|browser\\|name=foo\\=1||browser\\|to_delete=foo\\=2||notbrowser=foo\\\\3||notbrowser2=foo4|", }] timestamp = datetime.now(tz=pytz.utc) message: Message[KafkaPayload] = Message( Partition(Topic("replacements"), 1), 42, KafkaPayload( None, json.dumps(( 2, "end_delete_tag", { "project_id": project_id, "tag": "browser|to_delete", "datetime": timestamp.strftime(PAYLOAD_DATETIME_FORMAT), }, )).encode("utf-8"), ), datetime.now(), ) processed = self.replacer.process_message(message) self.replacer.flush_batch([processed]) assert _fetch_flattened_tags() == [{ "tags.key": ["browser|name", "notbrowser", "notbrowser2"], "tags.value": ["foo=1", "foo\\3", "foo4"], "_tags_flattened": "|browser\\|name=foo\\=1||notbrowser=foo\\\\3||notbrowser2=foo4|", }]
def encode(self, value: int) -> KafkaPayload: return KafkaPayload(None, f"{value}".encode("utf-8"))