def test_table_name_filter() -> None: table_name = "table_name" message_filter = CdcTableNameMessageFilter(table_name) # Messages that math the table should not be dropped. assert not message_filter.should_drop( Message( Partition(Topic("topic"), 0), 0, KafkaPayload(None, b"", [("table", table_name.encode("utf8"))]), datetime.now(), )) # Messages without a table should be dropped. assert message_filter.should_drop( Message( Partition(Topic("topic"), 0), 0, KafkaPayload(None, b"", []), datetime.now(), )) # Messages from a different table should be dropped. assert message_filter.should_drop( Message( Partition(Topic("topic"), 0), 0, KafkaPayload(None, b"", [("table", b"other_table")]), datetime.now(), ))
def test_ignored_table(self): message_filter = CdcTableNameMessageFilter( postgres_table=POSTGRES_TABLE) assert message_filter.should_drop( self.__make_msg(0, 42, self.UPDATE_MSG, [("table", "NOT_groupedmessage".encode())]))
def test_messages(self): processor = GroupedMessageProcessor("sentry_groupedmessage") message_filter = CdcTableNameMessageFilter(postgres_table=POSTGRES_TABLE) metadata = KafkaMessageMetadata( offset=42, partition=0, timestamp=datetime(1970, 1, 1) ) assert message_filter.should_drop(self.__make_msg(0, 42, self.BEGIN_MSG, [])) assert message_filter.should_drop(self.__make_msg(0, 42, self.COMMIT_MSG, [])) assert not message_filter.should_drop( self.__make_msg( 0, 42, self.INSERT_MSG, [("table", "sentry_groupedmessage".encode())] ) ) insert_msg = json.loads(self.INSERT_MSG) ret = processor.process_message(insert_msg, metadata) assert ret == InsertBatch([self.PROCESSED]) self.write_processed_messages([ret]) ret = ( get_cluster(StorageSetKey.EVENTS) .get_query_connection(ClickhouseClientSettings.INSERT) .execute("SELECT * FROM groupedmessage_local;") ) assert ret[0] == ( 42, # offset 0, # deleted 2, # project_id 74, # id 0, # status datetime(2019, 6, 19, 6, 46, 28), datetime(2019, 6, 19, 6, 45, 32), datetime(2019, 6, 19, 6, 45, 32), None, ) assert not message_filter.should_drop( self.__make_msg( 0, 42, self.UPDATE_MSG, [("table", "sentry_groupedmessage".encode())] ) ) update_msg = json.loads(self.UPDATE_MSG) ret = processor.process_message(update_msg, metadata) assert ret == InsertBatch([self.PROCESSED]) assert not message_filter.should_drop( self.__make_msg( 0, 42, self.DELETE_MSG, [("table", "sentry_groupedmessage".encode())] ) ) delete_msg = json.loads(self.DELETE_MSG) ret = processor.process_message(delete_msg, metadata) assert ret == InsertBatch([self.DELETED])
def test_messages(self): processor = GroupedMessageProcessor("sentry_groupedmessage") message_filter = CdcTableNameMessageFilter( postgres_table=POSTGRES_TABLE) metadata = KafkaMessageMetadata( offset=42, partition=0, ) assert not message_filter.should_drop( self.__make_msg(0, 42, self.BEGIN_MSG, [])) begin_msg = json.loads(self.BEGIN_MSG) ret = processor.process_message(begin_msg, metadata) assert ret is None assert not message_filter.should_drop( self.__make_msg(0, 42, self.COMMIT_MSG, [])) commit_msg = json.loads(self.COMMIT_MSG) ret = processor.process_message(commit_msg, metadata) assert ret is None assert not message_filter.should_drop( self.__make_msg(0, 42, self.INSERT_MSG, [("table", "sentry_groupedmessage".encode())])) insert_msg = json.loads(self.INSERT_MSG) ret = processor.process_message(insert_msg, metadata) assert ret.data == [self.PROCESSED] self.write_processed_records(ret.data) ret = clickhouse_ro.execute("SELECT * FROM test_groupedmessage_local;") assert ret[0] == ( 42, # offset 0, # deleted 2, # project_id 74, # id 0, # status datetime(2019, 6, 19, 6, 46, 28), datetime(2019, 6, 19, 6, 45, 32), datetime(2019, 6, 19, 6, 45, 32), None, ) assert not message_filter.should_drop( self.__make_msg(0, 42, self.UPDATE_MSG, [("table", "sentry_groupedmessage".encode())])) update_msg = json.loads(self.UPDATE_MSG) ret = processor.process_message(update_msg, metadata) assert ret.data == [self.PROCESSED] assert not message_filter.should_drop( self.__make_msg(0, 42, self.DELETE_MSG, [])) delete_msg = json.loads(self.DELETE_MSG) ret = processor.process_message(delete_msg, metadata) assert ret.data == [self.DELETED]
("user_id", UInt(64, Modifiers(nullable=True))), ("team_id", UInt(64, Modifiers(nullable=True))), ]) schema = WritableTableSchema( columns=columns, local_table_name="groupassignee_local", dist_table_name="groupassignee_dist", storage_set_key=StorageSetKey.CDC, ) POSTGRES_TABLE = "sentry_groupasignee" storage = CdcStorage( storage_key=StorageKey.GROUPASSIGNEES, storage_set_key=StorageSetKey.CDC, schema=schema, query_processors=[ PrewhereProcessor(["project_id"]), ConsistencyEnforcerProcessor(), ], stream_loader=build_kafka_stream_loader_from_settings( processor=GroupAssigneeProcessor(POSTGRES_TABLE), default_topic=Topic.CDC, pre_filter=CdcTableNameMessageFilter(POSTGRES_TABLE), ), default_control_topic="cdc_control", postgres_table=POSTGRES_TABLE, row_processor=lambda row: GroupAssigneeRow.from_bulk(row).to_clickhouse(), )
def test_messages(self): processor = GroupAssigneeProcessor("sentry_groupasignee") message_filter = CdcTableNameMessageFilter( postgres_table=POSTGRES_TABLE) metadata = KafkaMessageMetadata(offset=42, partition=0, timestamp=datetime(1970, 1, 1)) assert message_filter.should_drop( self.__make_msg(0, 42, self.BEGIN_MSG, [])) assert message_filter.should_drop( self.__make_msg(0, 42, self.COMMIT_MSG, [])) assert not message_filter.should_drop( self.__make_msg(0, 42, self.INSERT_MSG, [("table", POSTGRES_TABLE.encode())])) insert_msg = json.loads(self.INSERT_MSG) ret = processor.process_message(insert_msg, metadata) assert ret == InsertBatch([self.PROCESSED]) self.write_processed_messages([ret]) ret = (get_cluster(StorageSetKey.EVENTS).get_query_connection( ClickhouseClientSettings.QUERY).execute( "SELECT * FROM groupassignee_local;")) assert ret[0] == ( 42, # offset 0, # deleted 2, # project_id 1359, # group_id datetime(2019, 9, 19, 0, 17, 55), 1, # user_id None, # team_id ) assert not message_filter.should_drop( self.__make_msg( 0, 42, self.UPDATE_MSG_NO_KEY_CHANGE, [("table", POSTGRES_TABLE.encode())], )) update_msg = json.loads(self.UPDATE_MSG_NO_KEY_CHANGE) ret = processor.process_message(update_msg, metadata) assert ret == InsertBatch([self.PROCESSED]) # Tests an update with key change which becomes a two inserts: # one deletion and the insertion of the new row. assert not message_filter.should_drop( self.__make_msg( 0, 42, self.UPDATE_MSG_WITH_KEY_CHANGE, [("table", POSTGRES_TABLE.encode())], )) update_msg = json.loads(self.UPDATE_MSG_WITH_KEY_CHANGE) ret = processor.process_message(update_msg, metadata) assert ret == InsertBatch([self.DELETED, self.PROCESSED_UPDATE]) assert not message_filter.should_drop( self.__make_msg( 0, 42, self.DELETE_MSG, [("table", POSTGRES_TABLE.encode())], )) delete_msg = json.loads(self.DELETE_MSG) ret = processor.process_message(delete_msg, metadata) assert ret == InsertBatch([self.DELETED])
def test_messages(self): processor = GroupAssigneeProcessor("sentry_groupasignee") message_filter = CdcTableNameMessageFilter( postgres_table=POSTGRES_TABLE) metadata = KafkaMessageMetadata( offset=42, partition=0, ) assert not message_filter.should_drop( self.__make_msg(0, 42, self.BEGIN_MSG, [])) begin_msg = json.loads(self.BEGIN_MSG) ret = processor.process_message(begin_msg, metadata) assert ret is None assert not message_filter.should_drop( self.__make_msg(0, 42, self.COMMIT_MSG, [])) commit_msg = json.loads(self.COMMIT_MSG) ret = processor.process_message(commit_msg, metadata) assert ret is None assert not message_filter.should_drop( self.__make_msg(0, 42, self.INSERT_MSG, [("table", POSTGRES_TABLE.encode())])) insert_msg = json.loads(self.INSERT_MSG) ret = processor.process_message(insert_msg, metadata) assert ret.data == [self.PROCESSED] self.write_processed_records(ret.data) ret = clickhouse_ro.execute("SELECT * FROM test_groupassignee_local;") assert ret[0] == ( 42, # offset 0, # deleted 2, # project_id 1359, # group_id datetime(2019, 9, 19, 0, 17, 55), 1, # user_id None, # team_id ) assert not message_filter.should_drop( self.__make_msg( 0, 42, self.UPDATE_MSG_NO_KEY_CHANGE, [("table", POSTGRES_TABLE.encode())], )) update_msg = json.loads(self.UPDATE_MSG_NO_KEY_CHANGE) ret = processor.process_message(update_msg, metadata) assert ret.data == [self.PROCESSED] # Tests an update with key change which becomes a two inserts: # one deletion and the insertion of the new row. assert not message_filter.should_drop( self.__make_msg( 0, 42, self.UPDATE_MSG_WITH_KEY_CHANGE, [("table", POSTGRES_TABLE.encode())], )) update_msg = json.loads(self.UPDATE_MSG_WITH_KEY_CHANGE) ret = processor.process_message(update_msg, metadata) assert ret.data == [self.DELETED, self.PROCESSED_UPDATE] assert not message_filter.should_drop( self.__make_msg(0, 42, self.DELETE_MSG, [])) delete_msg = json.loads(self.DELETE_MSG) ret = processor.process_message(delete_msg, metadata) assert ret.data == [self.DELETED]