示例#1
0
    def load(self, writer: BufferedWriterWrapper) -> None:
        logger = logging.getLogger("snuba.bulk-loader")

        clickhouse_tables = clickhouse_ro.execute("show tables")
        if (self.__dest_table, ) not in clickhouse_tables:
            raise ValueError("Destination table %s does not exists" %
                             self.__dest_table)

        table_content = clickhouse_ro.execute("select count(*) from %s" %
                                              self.__dest_table)
        if table_content != [(0, )]:
            raise ValueError("Destination Table is not empty")

        descriptor = self.__source.get_descriptor()
        logger.info("Loading snapshot %s", descriptor.id)

        with self.__source.get_table_file(self.__source_table) as table:
            logger.info("Loading table %s from file", self.__source_table)
            row_count = 0
            with writer as buffer_writer:
                for row in table:
                    clickhouse_data = self.__row_processor(row)
                    buffer_writer.write(clickhouse_data)
                    row_count += 1
            logger.info("Load complete %d records loaded", row_count)
示例#2
0
    def test_messages(self):
        processor = GroupedMessageProcessor("sentry_groupedmessage")
        message_filter = CdcTableNameMessageFilter(
            postgres_table=POSTGRES_TABLE)

        metadata = KafkaMessageMetadata(
            offset=42,
            partition=0,
        )

        assert not message_filter.should_drop(
            self.__make_msg(0, 42, self.BEGIN_MSG, []))
        begin_msg = json.loads(self.BEGIN_MSG)
        ret = processor.process_message(begin_msg, metadata)
        assert ret is None

        assert not message_filter.should_drop(
            self.__make_msg(0, 42, self.COMMIT_MSG, []))
        commit_msg = json.loads(self.COMMIT_MSG)
        ret = processor.process_message(commit_msg, metadata)
        assert ret is None

        assert not message_filter.should_drop(
            self.__make_msg(0, 42, self.INSERT_MSG,
                            [("table", "sentry_groupedmessage".encode())]))
        insert_msg = json.loads(self.INSERT_MSG)
        ret = processor.process_message(insert_msg, metadata)
        assert ret.data == [self.PROCESSED]
        self.write_processed_records(ret.data)
        ret = clickhouse_ro.execute("SELECT * FROM test_groupedmessage_local;")
        assert ret[0] == (
            42,  # offset
            0,  # deleted
            2,  # project_id
            74,  # id
            0,  # status
            datetime(2019, 6, 19, 6, 46, 28),
            datetime(2019, 6, 19, 6, 45, 32),
            datetime(2019, 6, 19, 6, 45, 32),
            None,
        )

        assert not message_filter.should_drop(
            self.__make_msg(0, 42, self.UPDATE_MSG,
                            [("table", "sentry_groupedmessage".encode())]))
        update_msg = json.loads(self.UPDATE_MSG)
        ret = processor.process_message(update_msg, metadata)
        assert ret.data == [self.PROCESSED]

        assert not message_filter.should_drop(
            self.__make_msg(0, 42, self.DELETE_MSG, []))
        delete_msg = json.loads(self.DELETE_MSG)
        ret = processor.process_message(delete_msg, metadata)
        assert ret.data == [self.DELETED]
示例#3
0
def check_clickhouse() -> bool:
    """
    Checks if all the tables in all the enabled datasets exist in ClickHouse
    """
    try:
        clickhouse_tables = clickhouse_ro.execute("show tables")
        for name in get_enabled_dataset_names():
            dataset = get_dataset(name)
            source = dataset.get_dataset_schemas().get_read_schema()
            if isinstance(source, TableSchema):
                table_name = source.get_table_name()
                if (table_name, ) not in clickhouse_tables:
                    return False

        return True

    except Exception:
        return False
示例#4
0
    def test_messages(self):
        processor = GroupAssigneeProcessor("sentry_groupasignee")

        metadata = KafkaMessageMetadata(
            offset=42,
            partition=0,
        )

        begin_msg = json.loads(self.BEGIN_MSG)
        ret = processor.process_message(begin_msg, metadata)
        assert ret is None

        commit_msg = json.loads(self.COMMIT_MSG)
        ret = processor.process_message(commit_msg, metadata)
        assert ret is None

        insert_msg = json.loads(self.INSERT_MSG)
        ret = processor.process_message(insert_msg, metadata)
        assert ret.data == [self.PROCESSED]
        self.write_processed_records(ret.data)
        ret = clickhouse_ro.execute("SELECT * FROM test_groupassignee_local;")
        assert ret[0] == (
            42,  # offset
            0,  # deleted
            2,  # project_id
            1359,  # group_id
            datetime(2019, 9, 19, 0, 17, 55),
            1,  # user_id
            None,  # team_id
        )

        update_msg = json.loads(self.UPDATE_MSG_NO_KEY_CHANGE)
        ret = processor.process_message(update_msg, metadata)
        assert ret.data == [self.PROCESSED]

        # Tests an update with key change which becomes a two inserts:
        # one deletion and the insertion of the new row.
        update_msg = json.loads(self.UPDATE_MSG_WITH_KEY_CHANGE)
        ret = processor.process_message(update_msg, metadata)
        assert ret.data == [self.DELETED, self.PROCESSED_UPDATE]

        delete_msg = json.loads(self.DELETE_MSG)
        ret = processor.process_message(delete_msg, metadata)
        assert ret.data == [self.DELETED]
示例#5
0
 def test_bulk_load(self):
     row = GroupAssigneeRow.from_bulk({
         "project_id": "2",
         "group_id": "1359",
         "date_added": "2019-09-19 00:17:55+00",
         "user_id": "1",
         "team_id": "",
     })
     self.write_processed_records(row.to_clickhouse())
     ret = clickhouse_ro.execute("SELECT * FROM test_groupassignee_local;")
     assert ret[0] == (
         0,  # offset
         0,  # deleted
         2,  # project_id
         1359,  # group_id
         datetime(2019, 9, 19, 0, 17, 55),
         1,  # user_id
         None,  # team_id
     )
示例#6
0
    def test_messages(self):
        processor = GroupedMessageProcessor("sentry_groupedmessage")

        metadata = KafkaMessageMetadata(
            offset=42,
            partition=0,
        )

        begin_msg = json.loads(self.BEGIN_MSG)
        ret = processor.process_message(begin_msg, metadata)
        assert ret is None

        commit_msg = json.loads(self.COMMIT_MSG)
        ret = processor.process_message(commit_msg, metadata)
        assert ret is None

        insert_msg = json.loads(self.INSERT_MSG)
        ret = processor.process_message(insert_msg, metadata)
        assert ret.data == [self.PROCESSED]
        self.write_processed_records(ret.data)
        ret = clickhouse_ro.execute("SELECT * FROM test_groupedmessage_local;")
        assert ret[0] == (
            42,  # offset
            0,  # deleted
            2,  # project_id
            74,  # id
            0,  # status
            datetime(2019, 6, 19, 6, 46, 28),
            datetime(2019, 6, 19, 6, 45, 32),
            datetime(2019, 6, 19, 6, 45, 32),
            None,
        )

        update_msg = json.loads(self.UPDATE_MSG)
        ret = processor.process_message(update_msg, metadata)
        assert ret.data == [self.PROCESSED]

        delete_msg = json.loads(self.DELETE_MSG)
        ret = processor.process_message(delete_msg, metadata)
        assert ret.data == [self.DELETED]
示例#7
0
 def test_bulk_load(self):
     row = GroupedMessageRow.from_bulk({
         "project_id": "2",
         "id": "10",
         "status": "0",
         "last_seen": "2019-06-28 17:57:32+00",
         "first_seen": "2019-06-28 06:40:17+00",
         "active_at": "2019-06-28 06:40:17+00",
         "first_release_id": "26",
     })
     self.write_processed_records(row.to_clickhouse())
     ret = clickhouse_ro.execute("SELECT * FROM test_groupedmessage_local;")
     assert ret[0] == (
         0,  # offset
         0,  # deleted
         2,  # project_id
         10,  # id
         0,  # status
         datetime(2019, 6, 28, 17, 57, 32),
         datetime(2019, 6, 28, 6, 40, 17),
         datetime(2019, 6, 28, 6, 40, 17),
         26,
     )
示例#8
0
    def test_messages(self):
        processor = GroupAssigneeProcessor("sentry_groupasignee")
        message_filter = CdcTableNameMessageFilter(
            postgres_table=POSTGRES_TABLE)

        metadata = KafkaMessageMetadata(
            offset=42,
            partition=0,
        )

        assert not message_filter.should_drop(
            self.__make_msg(0, 42, self.BEGIN_MSG, []))
        begin_msg = json.loads(self.BEGIN_MSG)
        ret = processor.process_message(begin_msg, metadata)
        assert ret is None

        assert not message_filter.should_drop(
            self.__make_msg(0, 42, self.COMMIT_MSG, []))
        commit_msg = json.loads(self.COMMIT_MSG)
        ret = processor.process_message(commit_msg, metadata)
        assert ret is None

        assert not message_filter.should_drop(
            self.__make_msg(0, 42, self.INSERT_MSG,
                            [("table", POSTGRES_TABLE.encode())]))
        insert_msg = json.loads(self.INSERT_MSG)
        ret = processor.process_message(insert_msg, metadata)
        assert ret.data == [self.PROCESSED]
        self.write_processed_records(ret.data)
        ret = clickhouse_ro.execute("SELECT * FROM test_groupassignee_local;")
        assert ret[0] == (
            42,  # offset
            0,  # deleted
            2,  # project_id
            1359,  # group_id
            datetime(2019, 9, 19, 0, 17, 55),
            1,  # user_id
            None,  # team_id
        )

        assert not message_filter.should_drop(
            self.__make_msg(
                0,
                42,
                self.UPDATE_MSG_NO_KEY_CHANGE,
                [("table", POSTGRES_TABLE.encode())],
            ))
        update_msg = json.loads(self.UPDATE_MSG_NO_KEY_CHANGE)
        ret = processor.process_message(update_msg, metadata)
        assert ret.data == [self.PROCESSED]

        # Tests an update with key change which becomes a two inserts:
        # one deletion and the insertion of the new row.
        assert not message_filter.should_drop(
            self.__make_msg(
                0,
                42,
                self.UPDATE_MSG_WITH_KEY_CHANGE,
                [("table", POSTGRES_TABLE.encode())],
            ))
        update_msg = json.loads(self.UPDATE_MSG_WITH_KEY_CHANGE)
        ret = processor.process_message(update_msg, metadata)
        assert ret.data == [self.DELETED, self.PROCESSED_UPDATE]

        assert not message_filter.should_drop(
            self.__make_msg(0, 42, self.DELETE_MSG, []))
        delete_msg = json.loads(self.DELETE_MSG)
        ret = processor.process_message(delete_msg, metadata)
        assert ret.data == [self.DELETED]