示例#1
0
    def test(self):
        # no data, 0 partitions to optimize
        parts = optimize.get_partitions_to_optimize(
            self.clickhouse, self.database, self.table
        )
        assert parts == []

        base = datetime(1999, 12, 26)  # a sunday
        base_monday = base - timedelta(days=base.weekday())

        # 1 event, 0 unoptimized parts
        self.write_processed_records(self.create_event_for_date(base))
        parts = optimize.get_partitions_to_optimize(
            self.clickhouse, self.database, self.table
        )
        assert parts == []

        # 2 events in the same part, 1 unoptimized part
        self.write_processed_records(self.create_event_for_date(base))
        parts = optimize.get_partitions_to_optimize(
            self.clickhouse, self.database, self.table
        )
        assert parts == [(base_monday, 90)]

        # 3 events in the same part, 1 unoptimized part
        self.write_processed_records(self.create_event_for_date(base))
        parts = optimize.get_partitions_to_optimize(
            self.clickhouse, self.database, self.table
        )
        assert parts == [(base_monday, 90)]

        # 3 events in one part, 2 in another, 2 unoptimized parts
        a_month_earlier = base_monday - timedelta(days=31)
        a_month_earlier_monday = a_month_earlier - timedelta(
            days=a_month_earlier.weekday()
        )
        self.write_processed_records(self.create_event_for_date(a_month_earlier_monday))
        self.write_processed_records(self.create_event_for_date(a_month_earlier_monday))
        parts = optimize.get_partitions_to_optimize(
            self.clickhouse, self.database, self.table
        )
        assert parts == [(base_monday, 90), (a_month_earlier_monday, 90)]

        # respects before (base is properly excluded)
        assert list(
            optimize.get_partitions_to_optimize(
                self.clickhouse, self.database, self.table, before=base
            )
        ) == [(a_month_earlier_monday, 90)]

        optimize.optimize_partitions(self.clickhouse, self.database, self.table, parts)

        # all parts should be optimized
        parts = optimize.get_partitions_to_optimize(
            self.clickhouse, self.database, self.table
        )
        assert parts == []
示例#2
0
    def test(self) -> None:
        storage = get_writable_storage(StorageKey.EVENTS)
        cluster = storage.get_cluster()
        clickhouse = cluster.get_query_connection(ClickhouseClientSettings.OPTIMIZE)
        table = storage.get_table_writer().get_schema().get_table_name()
        database = cluster.get_database()

        # no data, 0 partitions to optimize
        parts = optimize.get_partitions_to_optimize(clickhouse, database, table)
        assert parts == []

        base = datetime(1999, 12, 26)  # a sunday
        base_monday = base - timedelta(days=base.weekday())

        # 1 event, 0 unoptimized parts
        self.write_processed_messages([self.create_event_row_for_date(base)])
        parts = optimize.get_partitions_to_optimize(clickhouse, database, table)
        assert parts == []

        # 2 events in the same part, 1 unoptimized part
        self.write_processed_messages([self.create_event_row_for_date(base)])
        parts = optimize.get_partitions_to_optimize(clickhouse, database, table)
        assert parts == [(base_monday, 90)]

        # 3 events in the same part, 1 unoptimized part
        self.write_processed_messages([self.create_event_row_for_date(base)])
        parts = optimize.get_partitions_to_optimize(clickhouse, database, table)
        assert parts == [(base_monday, 90)]

        # 3 events in one part, 2 in another, 2 unoptimized parts
        a_month_earlier = base_monday - timedelta(days=31)
        a_month_earlier_monday = a_month_earlier - timedelta(
            days=a_month_earlier.weekday()
        )
        self.write_processed_messages(
            [self.create_event_row_for_date(a_month_earlier_monday)]
        )
        self.write_processed_messages(
            [self.create_event_row_for_date(a_month_earlier_monday)]
        )
        parts = optimize.get_partitions_to_optimize(clickhouse, database, table)
        assert parts == [(base_monday, 90), (a_month_earlier_monday, 90)]

        # respects before (base is properly excluded)
        assert list(
            optimize.get_partitions_to_optimize(
                clickhouse, database, table, before=base
            )
        ) == [(a_month_earlier_monday, 90)]

        optimize.optimize_partitions(clickhouse, database, table, parts)

        # all parts should be optimized
        parts = optimize.get_partitions_to_optimize(clickhouse, database, table)
        assert parts == []
示例#3
0
    def test_optimize(
        self,
        storage_key: StorageKey,
        create_event_row_for_date: Callable[[datetime], InsertBatch],
    ) -> None:
        storage = get_writable_storage(storage_key)
        cluster = storage.get_cluster()
        clickhouse = cluster.get_query_connection(ClickhouseClientSettings.OPTIMIZE)
        table = storage.get_table_writer().get_schema().get_local_table_name()
        database = cluster.get_database()

        # no data, 0 partitions to optimize
        parts = optimize.get_partitions_to_optimize(
            clickhouse, storage, database, table
        )
        assert parts == []

        base = datetime(1999, 12, 26)  # a sunday
        base_monday = base - timedelta(days=base.weekday())

        # 1 event, 0 unoptimized parts
        write_processed_messages(storage, [create_event_row_for_date(base)])
        parts = optimize.get_partitions_to_optimize(
            clickhouse, storage, database, table
        )
        assert parts == []

        # 2 events in the same part, 1 unoptimized part
        write_processed_messages(storage, [create_event_row_for_date(base)])
        parts = optimize.get_partitions_to_optimize(
            clickhouse, storage, database, table
        )
        assert [(p.date, p.retention_days) for p in parts] == [(base_monday, 90)]

        # 3 events in the same part, 1 unoptimized part
        write_processed_messages(storage, [create_event_row_for_date(base)])
        parts = optimize.get_partitions_to_optimize(
            clickhouse, storage, database, table
        )
        assert [(p.date, p.retention_days) for p in parts] == [(base_monday, 90)]

        # 3 events in one part, 2 in another, 2 unoptimized parts
        a_month_earlier = base_monday - timedelta(days=31)
        a_month_earlier_monday = a_month_earlier - timedelta(
            days=a_month_earlier.weekday()
        )
        write_processed_messages(
            storage, [create_event_row_for_date(a_month_earlier_monday)]
        )
        write_processed_messages(
            storage, [create_event_row_for_date(a_month_earlier_monday)]
        )
        parts = optimize.get_partitions_to_optimize(
            clickhouse, storage, database, table
        )
        assert [(p.date, p.retention_days) for p in parts] == [
            (base_monday, 90),
            (a_month_earlier_monday, 90),
        ]

        # respects before (base is properly excluded)
        assert [
            (p.date, p.retention_days)
            for p in list(
                optimize.get_partitions_to_optimize(
                    clickhouse, storage, database, table, before=base
                )
            )
        ] == [(a_month_earlier_monday, 90)]

        optimize.optimize_partitions(clickhouse, database, table, parts)

        # all parts should be optimized
        parts = optimize.get_partitions_to_optimize(
            clickhouse, storage, database, table
        )
        assert parts == []