Пример #1
0
def test_table_name_filter() -> None:
    table_name = "table_name"
    message_filter = CdcTableNameMessageFilter(table_name)

    # Messages that math the table should not be dropped.
    assert not message_filter.should_drop(
        Message(
            Partition(Topic("topic"), 0),
            0,
            KafkaPayload(None, b"", [("table", table_name.encode("utf8"))]),
            datetime.now(),
        ))

    # Messages without a table should be dropped.
    assert message_filter.should_drop(
        Message(
            Partition(Topic("topic"), 0),
            0,
            KafkaPayload(None, b"", []),
            datetime.now(),
        ))

    # Messages from a different table should be dropped.
    assert message_filter.should_drop(
        Message(
            Partition(Topic("topic"), 0),
            0,
            KafkaPayload(None, b"", [("table", b"other_table")]),
            datetime.now(),
        ))
Пример #2
0
    def test_ignored_table(self):
        message_filter = CdcTableNameMessageFilter(
            postgres_table=POSTGRES_TABLE)

        assert message_filter.should_drop(
            self.__make_msg(0, 42, self.UPDATE_MSG,
                            [("table", "NOT_groupedmessage".encode())]))
Пример #3
0
    def test_messages(self):
        processor = GroupedMessageProcessor("sentry_groupedmessage")
        message_filter = CdcTableNameMessageFilter(postgres_table=POSTGRES_TABLE)

        metadata = KafkaMessageMetadata(
            offset=42, partition=0, timestamp=datetime(1970, 1, 1)
        )

        assert message_filter.should_drop(self.__make_msg(0, 42, self.BEGIN_MSG, []))

        assert message_filter.should_drop(self.__make_msg(0, 42, self.COMMIT_MSG, []))

        assert not message_filter.should_drop(
            self.__make_msg(
                0, 42, self.INSERT_MSG, [("table", "sentry_groupedmessage".encode())]
            )
        )
        insert_msg = json.loads(self.INSERT_MSG)
        ret = processor.process_message(insert_msg, metadata)
        assert ret == InsertBatch([self.PROCESSED])
        self.write_processed_messages([ret])
        ret = (
            get_cluster(StorageSetKey.EVENTS)
            .get_query_connection(ClickhouseClientSettings.INSERT)
            .execute("SELECT * FROM groupedmessage_local;")
        )
        assert ret[0] == (
            42,  # offset
            0,  # deleted
            2,  # project_id
            74,  # id
            0,  # status
            datetime(2019, 6, 19, 6, 46, 28),
            datetime(2019, 6, 19, 6, 45, 32),
            datetime(2019, 6, 19, 6, 45, 32),
            None,
        )

        assert not message_filter.should_drop(
            self.__make_msg(
                0, 42, self.UPDATE_MSG, [("table", "sentry_groupedmessage".encode())]
            )
        )
        update_msg = json.loads(self.UPDATE_MSG)
        ret = processor.process_message(update_msg, metadata)
        assert ret == InsertBatch([self.PROCESSED])

        assert not message_filter.should_drop(
            self.__make_msg(
                0, 42, self.DELETE_MSG, [("table", "sentry_groupedmessage".encode())]
            )
        )
        delete_msg = json.loads(self.DELETE_MSG)
        ret = processor.process_message(delete_msg, metadata)
        assert ret == InsertBatch([self.DELETED])
Пример #4
0
    def test_messages(self):
        processor = GroupedMessageProcessor("sentry_groupedmessage")
        message_filter = CdcTableNameMessageFilter(
            postgres_table=POSTGRES_TABLE)

        metadata = KafkaMessageMetadata(
            offset=42,
            partition=0,
        )

        assert not message_filter.should_drop(
            self.__make_msg(0, 42, self.BEGIN_MSG, []))
        begin_msg = json.loads(self.BEGIN_MSG)
        ret = processor.process_message(begin_msg, metadata)
        assert ret is None

        assert not message_filter.should_drop(
            self.__make_msg(0, 42, self.COMMIT_MSG, []))
        commit_msg = json.loads(self.COMMIT_MSG)
        ret = processor.process_message(commit_msg, metadata)
        assert ret is None

        assert not message_filter.should_drop(
            self.__make_msg(0, 42, self.INSERT_MSG,
                            [("table", "sentry_groupedmessage".encode())]))
        insert_msg = json.loads(self.INSERT_MSG)
        ret = processor.process_message(insert_msg, metadata)
        assert ret.data == [self.PROCESSED]
        self.write_processed_records(ret.data)
        ret = clickhouse_ro.execute("SELECT * FROM test_groupedmessage_local;")
        assert ret[0] == (
            42,  # offset
            0,  # deleted
            2,  # project_id
            74,  # id
            0,  # status
            datetime(2019, 6, 19, 6, 46, 28),
            datetime(2019, 6, 19, 6, 45, 32),
            datetime(2019, 6, 19, 6, 45, 32),
            None,
        )

        assert not message_filter.should_drop(
            self.__make_msg(0, 42, self.UPDATE_MSG,
                            [("table", "sentry_groupedmessage".encode())]))
        update_msg = json.loads(self.UPDATE_MSG)
        ret = processor.process_message(update_msg, metadata)
        assert ret.data == [self.PROCESSED]

        assert not message_filter.should_drop(
            self.__make_msg(0, 42, self.DELETE_MSG, []))
        delete_msg = json.loads(self.DELETE_MSG)
        ret = processor.process_message(delete_msg, metadata)
        assert ret.data == [self.DELETED]
Пример #5
0
    ("user_id", UInt(64, Modifiers(nullable=True))),
    ("team_id", UInt(64, Modifiers(nullable=True))),
])

schema = WritableTableSchema(
    columns=columns,
    local_table_name="groupassignee_local",
    dist_table_name="groupassignee_dist",
    storage_set_key=StorageSetKey.CDC,
)

POSTGRES_TABLE = "sentry_groupasignee"

storage = CdcStorage(
    storage_key=StorageKey.GROUPASSIGNEES,
    storage_set_key=StorageSetKey.CDC,
    schema=schema,
    query_processors=[
        PrewhereProcessor(["project_id"]),
        ConsistencyEnforcerProcessor(),
    ],
    stream_loader=build_kafka_stream_loader_from_settings(
        processor=GroupAssigneeProcessor(POSTGRES_TABLE),
        default_topic=Topic.CDC,
        pre_filter=CdcTableNameMessageFilter(POSTGRES_TABLE),
    ),
    default_control_topic="cdc_control",
    postgres_table=POSTGRES_TABLE,
    row_processor=lambda row: GroupAssigneeRow.from_bulk(row).to_clickhouse(),
)
Пример #6
0
    def test_messages(self):
        processor = GroupAssigneeProcessor("sentry_groupasignee")
        message_filter = CdcTableNameMessageFilter(
            postgres_table=POSTGRES_TABLE)

        metadata = KafkaMessageMetadata(offset=42,
                                        partition=0,
                                        timestamp=datetime(1970, 1, 1))

        assert message_filter.should_drop(
            self.__make_msg(0, 42, self.BEGIN_MSG, []))

        assert message_filter.should_drop(
            self.__make_msg(0, 42, self.COMMIT_MSG, []))

        assert not message_filter.should_drop(
            self.__make_msg(0, 42, self.INSERT_MSG,
                            [("table", POSTGRES_TABLE.encode())]))
        insert_msg = json.loads(self.INSERT_MSG)
        ret = processor.process_message(insert_msg, metadata)
        assert ret == InsertBatch([self.PROCESSED])
        self.write_processed_messages([ret])
        ret = (get_cluster(StorageSetKey.EVENTS).get_query_connection(
            ClickhouseClientSettings.QUERY).execute(
                "SELECT * FROM groupassignee_local;"))
        assert ret[0] == (
            42,  # offset
            0,  # deleted
            2,  # project_id
            1359,  # group_id
            datetime(2019, 9, 19, 0, 17, 55),
            1,  # user_id
            None,  # team_id
        )

        assert not message_filter.should_drop(
            self.__make_msg(
                0,
                42,
                self.UPDATE_MSG_NO_KEY_CHANGE,
                [("table", POSTGRES_TABLE.encode())],
            ))
        update_msg = json.loads(self.UPDATE_MSG_NO_KEY_CHANGE)
        ret = processor.process_message(update_msg, metadata)
        assert ret == InsertBatch([self.PROCESSED])

        # Tests an update with key change which becomes a two inserts:
        # one deletion and the insertion of the new row.
        assert not message_filter.should_drop(
            self.__make_msg(
                0,
                42,
                self.UPDATE_MSG_WITH_KEY_CHANGE,
                [("table", POSTGRES_TABLE.encode())],
            ))
        update_msg = json.loads(self.UPDATE_MSG_WITH_KEY_CHANGE)
        ret = processor.process_message(update_msg, metadata)
        assert ret == InsertBatch([self.DELETED, self.PROCESSED_UPDATE])

        assert not message_filter.should_drop(
            self.__make_msg(
                0,
                42,
                self.DELETE_MSG,
                [("table", POSTGRES_TABLE.encode())],
            ))
        delete_msg = json.loads(self.DELETE_MSG)
        ret = processor.process_message(delete_msg, metadata)
        assert ret == InsertBatch([self.DELETED])
Пример #7
0
    def test_messages(self):
        processor = GroupAssigneeProcessor("sentry_groupasignee")
        message_filter = CdcTableNameMessageFilter(
            postgres_table=POSTGRES_TABLE)

        metadata = KafkaMessageMetadata(
            offset=42,
            partition=0,
        )

        assert not message_filter.should_drop(
            self.__make_msg(0, 42, self.BEGIN_MSG, []))
        begin_msg = json.loads(self.BEGIN_MSG)
        ret = processor.process_message(begin_msg, metadata)
        assert ret is None

        assert not message_filter.should_drop(
            self.__make_msg(0, 42, self.COMMIT_MSG, []))
        commit_msg = json.loads(self.COMMIT_MSG)
        ret = processor.process_message(commit_msg, metadata)
        assert ret is None

        assert not message_filter.should_drop(
            self.__make_msg(0, 42, self.INSERT_MSG,
                            [("table", POSTGRES_TABLE.encode())]))
        insert_msg = json.loads(self.INSERT_MSG)
        ret = processor.process_message(insert_msg, metadata)
        assert ret.data == [self.PROCESSED]
        self.write_processed_records(ret.data)
        ret = clickhouse_ro.execute("SELECT * FROM test_groupassignee_local;")
        assert ret[0] == (
            42,  # offset
            0,  # deleted
            2,  # project_id
            1359,  # group_id
            datetime(2019, 9, 19, 0, 17, 55),
            1,  # user_id
            None,  # team_id
        )

        assert not message_filter.should_drop(
            self.__make_msg(
                0,
                42,
                self.UPDATE_MSG_NO_KEY_CHANGE,
                [("table", POSTGRES_TABLE.encode())],
            ))
        update_msg = json.loads(self.UPDATE_MSG_NO_KEY_CHANGE)
        ret = processor.process_message(update_msg, metadata)
        assert ret.data == [self.PROCESSED]

        # Tests an update with key change which becomes a two inserts:
        # one deletion and the insertion of the new row.
        assert not message_filter.should_drop(
            self.__make_msg(
                0,
                42,
                self.UPDATE_MSG_WITH_KEY_CHANGE,
                [("table", POSTGRES_TABLE.encode())],
            ))
        update_msg = json.loads(self.UPDATE_MSG_WITH_KEY_CHANGE)
        ret = processor.process_message(update_msg, metadata)
        assert ret.data == [self.DELETED, self.PROCESSED_UPDATE]

        assert not message_filter.should_drop(
            self.__make_msg(0, 42, self.DELETE_MSG, []))
        delete_msg = json.loads(self.DELETE_MSG)
        ret = processor.process_message(delete_msg, metadata)
        assert ret.data == [self.DELETED]