Пример #1
0
def test_disabled_cluster() -> None:
    importlib.reload(cluster)

    with pytest.raises(AssertionError):
        cluster.get_cluster(StorageSetKey.OUTCOMES)
    with patch("snuba.settings.ENABLE_DEV_FEATURES", True):
        cluster.get_cluster(StorageSetKey.OUTCOMES)
Пример #2
0
def test_dataset_load(dataset_name: str) -> None:
    """
    Tests that if we decalare a dataset that is not in in dev. It can
    be fully loaded including all its entities and storages
    """

    dataset = get_dataset(dataset_name)
    for entity in dataset.get_all_entities():
        for storage in entity.get_all_storages():
            get_cluster(storage.get_storage_set_key())
def backwards(logger: logging.Logger) -> None:
    """
    This method cleans up the temporary tables used by the forwards methodsa and
    returns us to the original state if the forwards method has failed somewhere
    in the middle. Otherwise it's a no-op.
    """
    cluster = get_cluster(StorageSetKey.TRANSACTIONS)

    if not cluster.is_single_node():
        return

    clickhouse = cluster.get_query_connection(ClickhouseClientSettings.MIGRATE)

    def table_exists(table_name: str) -> bool:
        return clickhouse.execute(f"EXISTS TABLE {table_name};") == [(1,)]

    if not table_exists(TABLE_NAME):
        raise Exception(f"Table {TABLE_NAME} is missing")

    if table_exists(TABLE_NAME_NEW):
        logger.info(f"Dropping table {TABLE_NAME_NEW}")
        time.sleep(1)
        clickhouse.execute(f"DROP TABLE {TABLE_NAME_NEW};")

    if table_exists(TABLE_NAME_OLD):
        logger.info(f"Dropping table {TABLE_NAME_OLD}")
        time.sleep(1)
        clickhouse.execute(f"DROP TABLE {TABLE_NAME_OLD};")
Пример #4
0
def test_run_migration() -> None:
    runner = Runner()
    runner.run_migration(MigrationKey(MigrationGroup.SYSTEM,
                                      "0001_migrations"))

    connection = get_cluster(StorageSetKey.MIGRATIONS).get_query_connection(
        ClickhouseClientSettings.MIGRATE)
    assert connection.execute(
        "SELECT group, migration_id, status, version FROM migrations_local;"
    ) == [("system", "0001_migrations", "completed", 1)]

    # Invalid migration ID
    with pytest.raises(MigrationError):
        runner.run_migration(MigrationKey(MigrationGroup.SYSTEM, "xxx"))

    # Run out of order
    with pytest.raises(MigrationError):
        runner.run_migration(MigrationKey(MigrationGroup.EVENTS,
                                          "0003_errors"))

    # Running with --fake
    runner.run_migration(MigrationKey(MigrationGroup.EVENTS,
                                      "0001_events_initial"),
                         fake=True)
    assert connection.execute("SHOW TABLES LIKE 'sentry_local'") == []
Пример #5
0
 def test_bulk_load(self) -> None:
     row = GroupAssigneeRow.from_bulk(
         {
             "project_id": "2",
             "group_id": "1359",
             "date_added": "2019-09-19 00:17:55+00",
             "user_id": "1",
             "team_id": "",
         }
     )
     self.write_rows([row.to_clickhouse()])
     ret = (
         get_cluster(StorageSetKey.EVENTS)
         .get_query_connection(ClickhouseClientSettings.QUERY)
         .execute("SELECT * FROM groupassignee_local;")
     )
     assert ret[0] == (
         0,  # offset
         0,  # deleted
         2,  # project_id
         1359,  # group_id
         datetime(2019, 9, 19, 0, 17, 55),
         1,  # user_id
         None,  # team_id
     )
Пример #6
0
    def test_messages(self) -> None:
        processor = GroupAssigneeProcessor("sentry_groupasignee")

        metadata = KafkaMessageMetadata(offset=42,
                                        partition=0,
                                        timestamp=datetime(1970, 1, 1))

        ret = processor.process_message(self.INSERT_MSG, metadata)
        assert ret == InsertBatch([self.PROCESSED])
        self.write_processed_messages([ret])
        ret = (get_cluster(StorageSetKey.EVENTS).get_query_connection(
            ClickhouseClientSettings.QUERY).execute(
                "SELECT * FROM groupassignee_local;"))
        assert ret[0] == (
            42,  # offset
            0,  # deleted
            2,  # project_id
            1359,  # group_id
            datetime(2019, 9, 19, 0, 17, 55),
            1,  # user_id
            None,  # team_id
        )

        ret = processor.process_message(self.UPDATE_MSG_NO_KEY_CHANGE,
                                        metadata)
        assert ret == InsertBatch([self.PROCESSED])

        # Tests an update with key change which becomes a two inserts:
        # one deletion and the insertion of the new row.
        ret = processor.process_message(self.UPDATE_MSG_WITH_KEY_CHANGE,
                                        metadata)
        assert ret == InsertBatch([self.DELETED, self.PROCESSED_UPDATE])

        ret = processor.process_message(self.DELETE_MSG, metadata)
        assert ret == InsertBatch([self.DELETED])
Пример #7
0
    def __dry_run(
        self,
        local_operations: Sequence[SqlOperation],
        dist_operations: Sequence[SqlOperation],
    ) -> None:

        print("Local operations:")
        if len(local_operations) == 0:
            print("n/a")

        for op in local_operations:
            print(op.format_sql())

        print("\n")
        print("Dist operations:")

        if len(dist_operations) == 0:
            print("n/a")

        for op in dist_operations:
            cluster = get_cluster(op._storage_set)

            if not cluster.is_single_node():
                print(op.format_sql())
            else:
                print("Skipped dist operation - single node cluster")
Пример #8
0
    def test_messages(self) -> None:
        processor = GroupedMessageProcessor("sentry_groupedmessage")

        metadata = KafkaMessageMetadata(
            offset=42, partition=0, timestamp=datetime(1970, 1, 1)
        )

        ret = processor.process_message(self.INSERT_MSG, metadata)
        assert ret == InsertBatch([self.PROCESSED])
        write_processed_messages(self.storage, [ret])
        ret = (
            get_cluster(StorageSetKey.EVENTS)
            .get_query_connection(ClickhouseClientSettings.INSERT)
            .execute("SELECT * FROM groupedmessage_local;")
        )
        assert ret[0] == (
            42,  # offset
            0,  # deleted
            2,  # project_id
            74,  # id
            0,  # status
            datetime(2019, 6, 19, 6, 46, 28),
            datetime(2019, 6, 19, 6, 45, 32),
            datetime(2019, 6, 19, 6, 45, 32),
            None,
        )

        ret = processor.process_message(self.UPDATE_MSG, metadata)
        assert ret == InsertBatch([self.PROCESSED])

        ret = processor.process_message(self.DELETE_MSG, metadata)
        assert ret == InsertBatch([self.DELETED])
Пример #9
0
    def get_batch_writer(
        self,
        metrics: MetricsBackend,
        options: ClickhouseWriterOptions = None,
        table_name: Optional[str] = None,
        chunk_size: int = settings.CLICKHOUSE_HTTP_CHUNK_SIZE,
    ) -> BatchWriter[JSONRow]:
        table_name = table_name or self.__table_schema.get_table_name()
        if self.__write_format == WriteFormat.JSON:
            insert_statement = InsertStatement(table_name).with_format(
                "JSONEachRow")
        elif self.__write_format == WriteFormat.VALUES:
            column_names = self.get_writeable_columns()
            insert_statement = (InsertStatement(table_name).with_format(
                "VALUES").with_columns(column_names))
        else:
            raise TypeError("unknown table format", self.__write_format)
        options = self.__update_writer_options(options)

        return get_cluster(self.__storage_set).get_batch_writer(
            metrics,
            insert_statement,
            encoding=None,
            options=options,
            chunk_size=chunk_size,
            buffer_size=0,
        )
Пример #10
0
 def test_bulk_load(self) -> None:
     row = GroupedMessageRow.from_bulk(
         {
             "project_id": "2",
             "id": "10",
             "status": "0",
             "last_seen": "2019-06-28 17:57:32+00",
             "first_seen": "2019-06-28 06:40:17+00",
             "active_at": "2019-06-28 06:40:17+00",
             "first_release_id": "26",
         }
     )
     write_processed_messages(self.storage, [InsertBatch([row.to_clickhouse()])])
     ret = (
         get_cluster(StorageSetKey.EVENTS)
         .get_query_connection(ClickhouseClientSettings.QUERY)
         .execute("SELECT * FROM groupedmessage_local;")
     )
     assert ret[0] == (
         0,  # offset
         0,  # deleted
         2,  # project_id
         10,  # id
         0,  # status
         datetime(2019, 6, 28, 17, 57, 32),
         datetime(2019, 6, 28, 6, 40, 17),
         datetime(2019, 6, 28, 6, 40, 17),
         26,
     )
Пример #11
0
 def get_table_name(self) -> str:
     """
     This represents the table we interact with to send queries to Clickhouse.
     In distributed mode this will be a distributed table. In local mode it is a local table.
     """
     return (self.__local_table_name
             if get_cluster(self.__storage_set_key).is_single_node() else
             self.__dist_table_name)
Пример #12
0
def fix_order_by() -> None:
    cluster = get_cluster(StorageSetKey.EVENTS)

    if not cluster.is_single_node():
        return

    clickhouse = cluster.get_query_connection(ClickhouseClientSettings.MIGRATE)
    database = cluster.get_database()

    new_primary_key = "project_id, id"
    old_primary_key = "id"

    ((curr_primary_key,),) = clickhouse.execute(
        f"SELECT primary_key FROM system.tables WHERE name = '{TABLE_NAME}' AND database = '{database}'"
    )

    assert curr_primary_key in [
        new_primary_key,
        old_primary_key,
    ], "Groupmessage table has invalid primary key"

    if curr_primary_key != old_primary_key:
        return

    # Add the project_id column
    add_column_sql = operations.AddColumn(
        storage_set=StorageSetKey.EVENTS,
        table_name=TABLE_NAME,
        column=Column("project_id", UInt(64)),
        after="record_deleted",
    ).format_sql()

    clickhouse.execute(add_column_sql)

    # There shouldn't be any data in the table yet
    assert (
        clickhouse.execute(f"SELECT COUNT() FROM {TABLE_NAME} FINAL;")[0][0] == 0
    ), f"{TABLE_NAME} is not empty"

    new_order_by = f"ORDER BY ({new_primary_key})"
    old_order_by = f"ORDER BY {old_primary_key}"

    ((curr_create_table_statement,),) = clickhouse.execute(
        f"SHOW CREATE TABLE {database}.{TABLE_NAME}"
    )

    new_create_table_statement = curr_create_table_statement.replace(
        TABLE_NAME, TABLE_NAME_NEW
    ).replace(old_order_by, new_order_by)

    clickhouse.execute(new_create_table_statement)

    clickhouse.execute(f"RENAME TABLE {TABLE_NAME} TO {TABLE_NAME_OLD};")

    clickhouse.execute(f"RENAME TABLE {TABLE_NAME_NEW} TO {TABLE_NAME};")

    clickhouse.execute(f"DROP TABLE {TABLE_NAME_OLD};")
Пример #13
0
def test_transactions_compatibility() -> None:
    cluster = get_cluster(StorageSetKey.TRANSACTIONS)
    connection = cluster.get_query_connection(ClickhouseClientSettings.MIGRATE)

    def get_sampling_key() -> str:
        database = cluster.get_database()
        ((sampling_key, ), ) = connection.execute(
            f"SELECT sampling_key FROM system.tables WHERE name = 'transactions_local' AND database = '{database}'"
        )
        return sampling_key

    # Create old style table without sampling expression and insert data
    connection.execute("""
        CREATE TABLE transactions_local (`project_id` UInt64, `event_id` UUID,
        `trace_id` UUID, `span_id` UInt64, `transaction_name` LowCardinality(String),
        `transaction_hash` UInt64 MATERIALIZED CAST(cityHash64(transaction_name), 'UInt64'),
        `transaction_op` LowCardinality(String), `transaction_status` UInt8 DEFAULT 2,
        `start_ts` DateTime, `start_ms` UInt16, `finish_ts` DateTime, `finish_ms` UInt16,
        `duration` UInt32, `platform` LowCardinality(String), `environment` LowCardinality(Nullable(String)),
        `release` LowCardinality(Nullable(String)), `dist` LowCardinality(Nullable(String)),
        `ip_address_v4` Nullable(IPv4), `ip_address_v6` Nullable(IPv6), `user` String DEFAULT '',
        `user_hash` UInt64 MATERIALIZED cityHash64(user), `user_id` Nullable(String),
        `user_name` Nullable(String), `user_email` Nullable(String),
        `sdk_name` LowCardinality(String) DEFAULT CAST('', 'LowCardinality(String)'),
        `sdk_version` LowCardinality(String) DEFAULT CAST('', 'LowCardinality(String)'),
        `http_method` LowCardinality(Nullable(String)) DEFAULT CAST('', 'LowCardinality(Nullable(String))'),
        `http_referer` Nullable(String),
        `tags.key` Array(String), `tags.value` Array(String), `_tags_flattened` String,
        `contexts.key` Array(String), `contexts.value` Array(String), `_contexts_flattened` String,
        `partition` UInt16, `offset` UInt64, `message_timestamp` DateTime, `retention_days` UInt16,
        `deleted` UInt8) ENGINE = ReplacingMergeTree(deleted) PARTITION BY (retention_days, toMonday(finish_ts))
        ORDER BY (project_id, toStartOfDay(finish_ts), transaction_name, cityHash64(span_id))
        TTL finish_ts + toIntervalDay(retention_days);
        """)

    assert get_sampling_key() == ""
    generate_transactions()

    runner = Runner()
    runner.run_migration(MigrationKey(MigrationGroup.SYSTEM,
                                      "0001_migrations"))
    runner._update_migration_status(
        MigrationKey(MigrationGroup.TRANSACTIONS, "0001_transactions"),
        Status.COMPLETED)
    runner.run_migration(
        MigrationKey(
            MigrationGroup.TRANSACTIONS,
            "0002_transactions_onpremise_fix_orderby_and_partitionby",
        ),
        force=True,
    )

    assert get_sampling_key() == "cityHash64(span_id)"

    assert connection.execute("SELECT count(*) FROM transactions_local;") == [
        (5, )
    ]
Пример #14
0
def build_plan(table_name: str, storage_set: StorageSetKey) -> ClickhouseQueryPlan:
    return ClickhouseQueryPlan(
        Query(Table(table_name, ColumnSet([]))),
        SimpleQueryPlanExecutionStrategy(
            get_cluster(storage_set), db_query_processors=[],
        ),
        storage_set,
        plan_query_processors=[],
        db_query_processors=[],
    )
Пример #15
0
    def execute(self, local: bool) -> None:
        cluster = get_cluster(self._storage_set)

        nodes = cluster.get_local_nodes(
        ) if local else cluster.get_distributed_nodes()

        for node in nodes:
            connection = cluster.get_node_connection(
                ClickhouseClientSettings.MIGRATE, node)
            connection.execute(self.format_sql())
Пример #16
0
def test_settings_skipped_group() -> None:
    from snuba.migrations import runner

    with patch("snuba.settings.SKIPPED_MIGRATION_GROUPS", {"querylog"}):
        runner.Runner().run_all(force=True)

    connection = get_cluster(StorageSetKey.MIGRATIONS).get_query_connection(
        ClickhouseClientSettings.MIGRATE)
    assert connection.execute(
        "SHOW TABLES LIKE 'querylog_local'").results == []
Пример #17
0
    def test_messages(self):
        processor = GroupedMessageProcessor("sentry_groupedmessage")
        message_filter = CdcTableNameMessageFilter(postgres_table=POSTGRES_TABLE)

        metadata = KafkaMessageMetadata(
            offset=42, partition=0, timestamp=datetime(1970, 1, 1)
        )

        assert message_filter.should_drop(self.__make_msg(0, 42, self.BEGIN_MSG, []))

        assert message_filter.should_drop(self.__make_msg(0, 42, self.COMMIT_MSG, []))

        assert not message_filter.should_drop(
            self.__make_msg(
                0, 42, self.INSERT_MSG, [("table", "sentry_groupedmessage".encode())]
            )
        )
        insert_msg = json.loads(self.INSERT_MSG)
        ret = processor.process_message(insert_msg, metadata)
        assert ret == InsertBatch([self.PROCESSED])
        self.write_processed_messages([ret])
        ret = (
            get_cluster(StorageSetKey.EVENTS)
            .get_query_connection(ClickhouseClientSettings.INSERT)
            .execute("SELECT * FROM groupedmessage_local;")
        )
        assert ret[0] == (
            42,  # offset
            0,  # deleted
            2,  # project_id
            74,  # id
            0,  # status
            datetime(2019, 6, 19, 6, 46, 28),
            datetime(2019, 6, 19, 6, 45, 32),
            datetime(2019, 6, 19, 6, 45, 32),
            None,
        )

        assert not message_filter.should_drop(
            self.__make_msg(
                0, 42, self.UPDATE_MSG, [("table", "sentry_groupedmessage".encode())]
            )
        )
        update_msg = json.loads(self.UPDATE_MSG)
        ret = processor.process_message(update_msg, metadata)
        assert ret == InsertBatch([self.PROCESSED])

        assert not message_filter.should_drop(
            self.__make_msg(
                0, 42, self.DELETE_MSG, [("table", "sentry_groupedmessage".encode())]
            )
        )
        delete_msg = json.loads(self.DELETE_MSG)
        ret = processor.process_message(delete_msg, metadata)
        assert ret == InsertBatch([self.DELETED])
Пример #18
0
def test_reverse_all() -> None:
    runner = Runner()
    all_migrations = runner._get_pending_migrations()
    runner.run_all(force=True)
    for migration in reversed(all_migrations):
        runner.reverse_migration(migration, force=True)

    connection = get_cluster(StorageSetKey.MIGRATIONS).get_query_connection(
        ClickhouseClientSettings.MIGRATE)
    assert connection.execute(
        "SHOW TABLES") == [], "All tables should be deleted"
Пример #19
0
    def __init__(self) -> None:
        migrations_cluster = get_cluster(StorageSetKey.MIGRATIONS)
        self.__table_name = (LOCAL_TABLE_NAME
                             if migrations_cluster.is_single_node() else
                             DIST_TABLE_NAME)

        self.__connection = migrations_cluster.get_query_connection(
            ClickhouseClientSettings.MIGRATE)

        self.__status: MutableMapping[MigrationKey,
                                      Tuple[Status, Optional[datetime]]] = {}
Пример #20
0
def test_groupedmessages_compatibility() -> None:
    cluster = get_cluster(StorageSetKey.EVENTS)

    # Ignore the multi node mode because this tests a migration
    # for an older table state that only applied to single node
    if not cluster.is_single_node():
        return

    database = cluster.get_database()
    connection = cluster.get_query_connection(ClickhouseClientSettings.MIGRATE)

    # Create old style table witihout project ID
    connection.execute("""
        CREATE TABLE groupedmessage_local (`offset` UInt64, `record_deleted` UInt8,
        `id` UInt64, `status` Nullable(UInt8), `last_seen` Nullable(DateTime),
        `first_seen` Nullable(DateTime), `active_at` Nullable(DateTime),
        `first_release_id` Nullable(UInt64)) ENGINE = ReplacingMergeTree(offset)
        ORDER BY id SAMPLE BY id SETTINGS index_granularity = 8192
        """)

    migration_id = "0010_groupedmessages_onpremise_compatibility"

    runner = Runner()
    runner.run_migration(MigrationKey(MigrationGroup.SYSTEM,
                                      "0001_migrations"))
    events_migrations = get_group_loader(
        MigrationGroup.EVENTS).get_migrations()

    # Mark prior migrations complete
    for migration in events_migrations[:(
            events_migrations.index(migration_id))]:
        runner._update_migration_status(
            MigrationKey(MigrationGroup.EVENTS, migration), Status.COMPLETED)

    runner.run_migration(
        MigrationKey(MigrationGroup.EVENTS, migration_id),
        force=True,
    )

    outcome = perform_select_query(
        ["primary_key"],
        "system.tables",
        {
            "name": "groupedmessage_local",
            "database": str(database)
        },
        None,
        connection,
    )

    assert outcome == [("project_id, id", )]
def ensure_drop_temporary_tables() -> None:
    cluster = get_cluster(StorageSetKey.EVENTS)

    if not cluster.is_single_node():
        return

    clickhouse = cluster.get_query_connection(ClickhouseClientSettings.MIGRATE)
    clickhouse.execute(
        operations.DropTable(
            storage_set=StorageSetKey.EVENTS,
            table_name=TABLE_NAME_NEW,
        ).format_sql())
    clickhouse.execute(
        operations.DropTable(
            storage_set=StorageSetKey.EVENTS,
            table_name=TABLE_NAME_OLD,
        ).format_sql())
Пример #22
0
 def __init__(
     self,
     columns: ColumnSet,
     *,
     local_table_name: str,
     dist_table_name: str,
     storage_set_key: StorageSetKey,
     mandatory_conditions: Optional[Sequence[FunctionCall]] = None,
     part_format: Optional[Sequence[util.PartSegment]] = None,
 ):
     self.__local_table_name = local_table_name
     self.__table_name = (local_table_name
                          if get_cluster(storage_set_key).is_single_node()
                          else dist_table_name)
     self.__table_source = TableSource(self.get_table_name(), columns,
                                       mandatory_conditions)
     self.__part_format = part_format
Пример #23
0
def _plan_composite_query(query: CompositeQuery[Entity],
                          settings: RequestSettings) -> CompositeQueryPlan:
    """
    Produces a composite query plan out of a composite query.

    This is the bulk of the logic of The Composite Planner. It is kept
    in its own function because it needs to be used by the data source
    visitor when planning subqueries (which can be composite as well).
    """

    planned_data_source = CompositeDataSourcePlanner(settings).visit(
        query.get_from_clause())

    root_db_processors, aliased_db_processors = planned_data_source.get_db_processors(
    )

    return CompositeQueryPlan(
        # This is a mypy issue: https://github.com/python/mypy/issues/7520
        # At the time of writing generics in dataclasses are not properly
        # supported and mypy expects TQuery instead of CompositeQuery here.
        # If the issue is not fixed before we start enforcing this we will
        # have to restructure the query plan.
        query=CompositeQuery(
            from_clause=planned_data_source.translated_source,
            selected_columns=query.get_selected_columns(),
            array_join=query.get_arrayjoin(),
            condition=query.get_condition(),
            groupby=query.get_groupby(),
            having=query.get_having(),
            order_by=query.get_orderby(),
            limitby=query.get_limitby(),
            limit=query.get_limit(),
            offset=query.get_offset(),
            totals=query.has_totals(),
            granularity=query.get_granularity(),
        ),
        execution_strategy=CompositeExecutionStrategy(
            get_cluster(planned_data_source.storage_set_key),
            root_db_processors,
            aliased_db_processors,
            composite_processors=[SemiJoinOptimizer()],
        ),
        storage_set_key=planned_data_source.storage_set_key,
        root_processors=planned_data_source.root_processors,
        aliased_processors=planned_data_source.aliased_processors,
    )
Пример #24
0
 def __init__(
     self,
     columns: ColumnSet,
     *,
     local_table_name: str,
     dist_table_name: str,
     storage_set_key: StorageSetKey,
     mandatory_conditions: Optional[Sequence[MandatoryCondition]] = None,
     prewhere_candidates: Optional[Sequence[str]] = None,
 ):
     self.__local_table_name = local_table_name
     self.__table_name = (
         local_table_name
         if get_cluster(storage_set_key).is_single_node()
         else dist_table_name
     )
     self.__table_source = TableSource(
         self.get_table_name(), columns, mandatory_conditions, prewhere_candidates,
     )
Пример #25
0
    def get_batch_writer(
        self,
        metrics: MetricsBackend,
        options: ClickhouseWriterOptions = None,
        table_name: Optional[str] = None,
        chunk_size: int = settings.CLICKHOUSE_HTTP_CHUNK_SIZE,
    ) -> BatchWriter[JSONRow]:
        table_name = table_name or self.__table_schema.get_table_name()

        options = self.__update_writer_options(options)

        return get_cluster(self.__storage_set).get_batch_writer(
            metrics,
            InsertStatement(table_name).with_format("JSONEachRow"),
            encoding=None,
            options=options,
            chunk_size=chunk_size,
            buffer_size=0,
        )
Пример #26
0
 def get_bulk_loader(
     self,
     source: BulkLoadSource,
     source_table: str,
     row_processor: RowProcessor,
     table_name: Optional[str] = None,
 ) -> BulkLoader:
     """
     Returns the instance of the bulk loader to populate the dataset from an
     external source when present.
     """
     table_name = table_name or self.__table_schema.get_table_name()
     return SingleTableBulkLoader(
         source=source,
         source_table=source_table,
         dest_table=table_name,
         row_processor=row_processor,
         clickhouse=get_cluster(self.__storage_set).get_query_connection(
             ClickhouseClientSettings.QUERY),
     )
Пример #27
0
    def get_bulk_writer(
        self,
        metrics: MetricsBackend,
        encoding: Optional[str],
        column_names: Sequence[str],
        options: ClickhouseWriterOptions = None,
        table_name: Optional[str] = None,
    ) -> BatchWriter[bytes]:
        table_name = table_name or self.__table_schema.get_table_name()

        options = self.__update_writer_options(options)

        return get_cluster(self.__storage_set).get_batch_writer(
            metrics,
            InsertStatement(table_name).with_columns(column_names).with_format(
                "CSVWithNames"),
            encoding=encoding,
            options=options,
            chunk_size=1,
            buffer_size=settings.HTTP_WRITER_BUFFER_SIZE,
        )
Пример #28
0
 def __init__(
     self,
     storage_key: StorageKey,
     storage_set_key: StorageSetKey,
     schema: Schema,
     query_processors: Sequence[QueryProcessor],
     stream_loader: KafkaStreamLoader,
     query_splitters: Optional[Sequence[QuerySplitStrategy]] = None,
     replacer_processor: Optional[ReplacerProcessor] = None,
     writer_options: ClickhouseWriterOptions = None,
 ) -> None:
     super().__init__(
         storage_key, storage_set_key, schema, query_processors, query_splitters
     )
     assert isinstance(schema, WritableTableSchema)
     self.__table_writer = TableWriter(
         cluster=get_cluster(storage_set_key),
         write_schema=schema,
         stream_loader=stream_loader,
         replacer_processor=replacer_processor,
         writer_options=writer_options,
     )
Пример #29
0
def test_reverse_migration() -> None:
    runner = Runner()
    runner.run_all(force=True)

    connection = get_cluster(StorageSetKey.MIGRATIONS).get_query_connection(
        ClickhouseClientSettings.MIGRATE)

    # Invalid migration ID
    with pytest.raises(MigrationError):
        runner.reverse_migration(MigrationKey(MigrationGroup.SYSTEM, "xxx"))

    with pytest.raises(MigrationError):
        runner.reverse_migration(
            MigrationKey(MigrationGroup.EVENTS, "0003_errors"))

    # Reverse with --fake
    for migration_id in reversed(
            get_group_loader(MigrationGroup.EVENTS).get_migrations()):
        runner.reverse_migration(MigrationKey(MigrationGroup.EVENTS,
                                              migration_id),
                                 fake=True)
    assert (len(connection.execute("SHOW TABLES LIKE 'sentry_local'")) == 1
            ), "Table still exists"
Пример #30
0
def test_groupedmessages_compatibility() -> None:
    cluster = get_cluster(StorageSetKey.EVENTS)
    database = cluster.get_database()
    connection = cluster.get_query_connection(ClickhouseClientSettings.MIGRATE)

    # Create old style table witihout project ID
    connection.execute("""
        CREATE TABLE groupedmessage_local (`offset` UInt64, `record_deleted` UInt8,
        `id` UInt64, `status` Nullable(UInt8), `last_seen` Nullable(DateTime),
        `first_seen` Nullable(DateTime), `active_at` Nullable(DateTime),
        `first_release_id` Nullable(UInt64)) ENGINE = ReplacingMergeTree(offset)
        ORDER BY id SAMPLE BY id SETTINGS index_granularity = 8192
        """)

    migration_id = "0010_groupedmessages_onpremise_compatibility"

    runner = Runner()
    runner.run_migration(MigrationKey(MigrationGroup.SYSTEM,
                                      "0001_migrations"))
    events_migrations = get_group_loader(
        MigrationGroup.EVENTS).get_migrations()

    # Mark prior migrations complete
    for migration in events_migrations[:(
            events_migrations.index(migration_id))]:
        runner._update_migration_status(
            MigrationKey(MigrationGroup.EVENTS, migration), Status.COMPLETED)

    runner.run_migration(
        MigrationKey(MigrationGroup.EVENTS, migration_id),
        force=True,
    )

    assert connection.execute(
        f"SELECT primary_key FROM system.tables WHERE name = 'groupedmessage_local' AND database = '{database}'"
    ) == [("project_id, id", )]