Пример #1
0
def test_run_migration() -> None:
    runner = Runner()
    runner.run_migration(MigrationKey(MigrationGroup.SYSTEM,
                                      "0001_migrations"))

    connection = get_cluster(StorageSetKey.MIGRATIONS).get_query_connection(
        ClickhouseClientSettings.MIGRATE)
    assert connection.execute(
        "SELECT group, migration_id, status, version FROM migrations_local;"
    ) == [("system", "0001_migrations", "completed", 1)]

    # Invalid migration ID
    with pytest.raises(MigrationError):
        runner.run_migration(MigrationKey(MigrationGroup.SYSTEM, "xxx"))

    # Run out of order
    with pytest.raises(MigrationError):
        runner.run_migration(MigrationKey(MigrationGroup.EVENTS,
                                          "0003_errors"))

    # Running with --fake
    runner.run_migration(MigrationKey(MigrationGroup.EVENTS,
                                      "0001_events_initial"),
                         fake=True)
    assert connection.execute("SHOW TABLES LIKE 'sentry_local'") == []
Пример #2
0
def test_transactions_compatibility() -> None:
    cluster = get_cluster(StorageSetKey.TRANSACTIONS)
    connection = cluster.get_query_connection(ClickhouseClientSettings.MIGRATE)

    def get_sampling_key() -> str:
        database = cluster.get_database()
        ((sampling_key, ), ) = connection.execute(
            f"SELECT sampling_key FROM system.tables WHERE name = 'transactions_local' AND database = '{database}'"
        )
        return sampling_key

    # Create old style table without sampling expression and insert data
    connection.execute("""
        CREATE TABLE transactions_local (`project_id` UInt64, `event_id` UUID,
        `trace_id` UUID, `span_id` UInt64, `transaction_name` LowCardinality(String),
        `transaction_hash` UInt64 MATERIALIZED CAST(cityHash64(transaction_name), 'UInt64'),
        `transaction_op` LowCardinality(String), `transaction_status` UInt8 DEFAULT 2,
        `start_ts` DateTime, `start_ms` UInt16, `finish_ts` DateTime, `finish_ms` UInt16,
        `duration` UInt32, `platform` LowCardinality(String), `environment` LowCardinality(Nullable(String)),
        `release` LowCardinality(Nullable(String)), `dist` LowCardinality(Nullable(String)),
        `ip_address_v4` Nullable(IPv4), `ip_address_v6` Nullable(IPv6), `user` String DEFAULT '',
        `user_hash` UInt64 MATERIALIZED cityHash64(user), `user_id` Nullable(String),
        `user_name` Nullable(String), `user_email` Nullable(String),
        `sdk_name` LowCardinality(String) DEFAULT CAST('', 'LowCardinality(String)'),
        `sdk_version` LowCardinality(String) DEFAULT CAST('', 'LowCardinality(String)'),
        `http_method` LowCardinality(Nullable(String)) DEFAULT CAST('', 'LowCardinality(Nullable(String))'),
        `http_referer` Nullable(String),
        `tags.key` Array(String), `tags.value` Array(String), `_tags_flattened` String,
        `contexts.key` Array(String), `contexts.value` Array(String), `_contexts_flattened` String,
        `partition` UInt16, `offset` UInt64, `message_timestamp` DateTime, `retention_days` UInt16,
        `deleted` UInt8) ENGINE = ReplacingMergeTree(deleted) PARTITION BY (retention_days, toMonday(finish_ts))
        ORDER BY (project_id, toStartOfDay(finish_ts), transaction_name, cityHash64(span_id))
        TTL finish_ts + toIntervalDay(retention_days);
        """)

    assert get_sampling_key() == ""
    generate_transactions()

    runner = Runner()
    runner.run_migration(MigrationKey(MigrationGroup.SYSTEM,
                                      "0001_migrations"))
    runner._update_migration_status(
        MigrationKey(MigrationGroup.TRANSACTIONS, "0001_transactions"),
        Status.COMPLETED)
    runner.run_migration(
        MigrationKey(
            MigrationGroup.TRANSACTIONS,
            "0002_transactions_onpremise_fix_orderby_and_partitionby",
        ),
        force=True,
    )

    assert get_sampling_key() == "cityHash64(span_id)"

    assert connection.execute("SELECT count(*) FROM transactions_local;") == [
        (5, )
    ]
Пример #3
0
def test_version() -> None:
    runner = Runner()
    runner.run_migration(MigrationKey(MigrationGroup.SYSTEM,
                                      "0001_migrations"))
    migration_key = MigrationKey(MigrationGroup.EVENTS, "test")
    assert runner._get_next_version(migration_key) == 1
    runner._update_migration_status(migration_key, Status.IN_PROGRESS)
    assert runner._get_next_version(migration_key) == 2
    runner._update_migration_status(migration_key, Status.COMPLETED)
    assert runner._get_next_version(migration_key) == 3
Пример #4
0
def test_groupedmessages_compatibility() -> None:
    cluster = get_cluster(StorageSetKey.EVENTS)

    # Ignore the multi node mode because this tests a migration
    # for an older table state that only applied to single node
    if not cluster.is_single_node():
        return

    database = cluster.get_database()
    connection = cluster.get_query_connection(ClickhouseClientSettings.MIGRATE)

    # Create old style table witihout project ID
    connection.execute("""
        CREATE TABLE groupedmessage_local (`offset` UInt64, `record_deleted` UInt8,
        `id` UInt64, `status` Nullable(UInt8), `last_seen` Nullable(DateTime),
        `first_seen` Nullable(DateTime), `active_at` Nullable(DateTime),
        `first_release_id` Nullable(UInt64)) ENGINE = ReplacingMergeTree(offset)
        ORDER BY id SAMPLE BY id SETTINGS index_granularity = 8192
        """)

    migration_id = "0010_groupedmessages_onpremise_compatibility"

    runner = Runner()
    runner.run_migration(MigrationKey(MigrationGroup.SYSTEM,
                                      "0001_migrations"))
    events_migrations = get_group_loader(
        MigrationGroup.EVENTS).get_migrations()

    # Mark prior migrations complete
    for migration in events_migrations[:(
            events_migrations.index(migration_id))]:
        runner._update_migration_status(
            MigrationKey(MigrationGroup.EVENTS, migration), Status.COMPLETED)

    runner.run_migration(
        MigrationKey(MigrationGroup.EVENTS, migration_id),
        force=True,
    )

    outcome = perform_select_query(
        ["primary_key"],
        "system.tables",
        {
            "name": "groupedmessage_local",
            "database": str(database)
        },
        None,
        connection,
    )

    assert outcome == [("project_id, id", )]
Пример #5
0
def _main() -> None:
    """
    This method takes the output of `git diff --name-status master snuba/migrations` and
    runs `snuba migrations run -dry-run with the proper parameters`, for a CI action
    """
    diff_result = subprocess.run(
        [
            "git",
            "diff",
            "--diff-filter=AM",
            "--name-only",
            "origin/master",
            "--",
            "snuba/migrations/snuba_migrations/*/[0-9]*.py",
        ],
        stdout=subprocess.PIPE,
        text=True,
    )
    if diff_result.returncode != 0:
        raise ExecError(diff_result.stdout)
    else:
        lines = diff_result.stdout.splitlines()
        if len(lines) > 0:
            print("-- start migrations")
            print()
        for line in lines:
            migration_filename = os.path.basename(line)
            migration_group = MigrationGroup(os.path.basename(os.path.dirname(line)))
            migration_id, _ = os.path.splitext(migration_filename)

            runner = Runner()
            migration_key = MigrationKey(migration_group, migration_id)
            print(f"-- migration {migration_group.value} : {migration_id}")
            runner.run_migration(migration_key, dry_run=True)
            print(f"-- end migration {migration_group.value} : {migration_id}")
Пример #6
0
def test_get_pending_migrations() -> None:
    runner = Runner()
    total_migrations = get_total_migration_count()
    assert len(runner._get_pending_migrations()) == total_migrations
    runner.run_migration(MigrationKey(MigrationGroup.SYSTEM,
                                      "0001_migrations"))
    assert len(runner._get_pending_migrations()) == total_migrations - 1
Пример #7
0
def test_get_status() -> None:
    runner = Runner()
    assert runner.get_status(
        MigrationKey(MigrationGroup.EVENTS,
                     "0001_events_initial")) == (Status.NOT_STARTED, None)
    runner.run_migration(MigrationKey(MigrationGroup.SYSTEM,
                                      "0001_migrations"))
    assert runner.get_status(
        MigrationKey(MigrationGroup.EVENTS,
                     "0001_events_initial")) == (Status.NOT_STARTED, None)
    runner.run_migration(
        MigrationKey(MigrationGroup.EVENTS, "0001_events_initial"))
    status = runner.get_status(
        MigrationKey(MigrationGroup.EVENTS, "0001_events_initial"))
    assert status[0] == Status.COMPLETED
    assert isinstance(status[1], datetime)
Пример #8
0
def run(group: str, migration_id: str, force: bool, fake: bool,
        dry_run: bool) -> None:
    """
    Runs a single migration.
    --force must be passed in order to run blocking migrations.
    --fake marks a migration as completed without running anything.

    Migrations that are already in an in-progress or completed status will not be run.
    """
    if not dry_run:
        check_clickhouse_connections()

    runner = Runner()
    migration_group = MigrationGroup(group)
    migration_key = MigrationKey(migration_group, migration_id)

    if dry_run:
        runner.run_migration(migration_key, dry_run=True)
        return

    try:
        if fake:
            click.confirm(
                "This will mark the migration as completed without actually running it. Your database may be in an invalid state. Are you sure?",
                abort=True,
            )
        runner.run_migration(migration_key, force=force, fake=fake)
    except MigrationError as e:
        raise click.ClickException(str(e))

    click.echo(f"Finished running migration {migration_key}")
Пример #9
0
def reverse(group: str, migration_id: str, force: bool, fake: bool,
            dry_run: bool) -> None:
    """
    Reverses a single migration.

    --force is required to reverse an already completed migration.
    --fake marks a migration as reversed without doing anything.
    """
    if not dry_run:
        check_clickhouse_connections()
    runner = Runner()
    migration_group = MigrationGroup(group)
    migration_key = MigrationKey(migration_group, migration_id)

    if dry_run:
        runner.reverse_migration(migration_key, dry_run=True)
        return

    try:
        if fake:
            click.confirm(
                "This will mark the migration as not started without actually reversing it. Your database may be in an invalid state. Are you sure?",
                abort=True,
            )
        runner.reverse_migration(migration_key, force=force, fake=fake)
    except MigrationError as e:
        raise click.ClickException(str(e))

    click.echo(f"Finished reversing migration {migration_key}")
Пример #10
0
def run_prior_migrations(
    migration_group: MigrationGroup, stop_migration_id: str, runner: Runner
) -> None:

    """Runs all migrations up to the migration denoted by migration ID

    Arguments:
    migration_group -- the group of the desired migration
    stop_migration_id -- desired migration ID, as a stopping point
    runner -- migration runner object
    """

    right_migrations = next(
        group_migrations
        for (group, group_migrations) in runner.show_all()
        if group == migration_group
    )

    # Run migrations up to the desired 'stop' ID
    for migration in right_migrations:
        if migration.migration_id == stop_migration_id:
            break

        runner.run_migration(
            MigrationKey(migration_group, migration.migration_id), force=True
        )
Пример #11
0
def test_backfill_errors() -> None:

    backfill_migration_id = "0014_backfill_errors"
    runner = Runner()
    runner.run_migration(MigrationKey(MigrationGroup.SYSTEM,
                                      "0001_migrations"))

    run_prior_migrations(MigrationGroup.EVENTS, backfill_migration_id, runner)

    errors_storage = get_writable_storage(StorageKey.ERRORS)
    clickhouse = errors_storage.get_cluster().get_query_connection(
        ClickhouseClientSettings.QUERY)
    errors_table_name = errors_storage.get_table_writer().get_schema(
    ).get_table_name()

    raw_events = []
    for i in range(10):
        event = get_raw_event()
        raw_events.append(event)

    events_storage = get_writable_storage(StorageKey.EVENTS)

    write_unprocessed_events(events_storage, raw_events)

    assert get_count_from_storage(errors_table_name, clickhouse) == 0

    # Run 0014_backfill_errors
    runner.run_migration(MigrationKey(MigrationGroup.EVENTS,
                                      backfill_migration_id),
                         force=True)

    assert get_count_from_storage(errors_table_name, clickhouse) == 10

    outcome = perform_select_query(["contexts.key", "contexts.value"],
                                   errors_table_name, None, str(1), clickhouse)

    assert outcome[0] == (
        [
            "device.model_id",
            "geo.city",
            "geo.country_code",
            "geo.region",
            "os.kernel_version",
        ],
        ["Galaxy", "San Francisco", "US", "CA", "1.1.1"],
    )
Пример #12
0
def test_reverse_migration() -> None:
    runner = Runner()
    runner.run_all(force=True)

    connection = get_cluster(StorageSetKey.MIGRATIONS).get_query_connection(
        ClickhouseClientSettings.MIGRATE)

    # Invalid migration ID
    with pytest.raises(MigrationError):
        runner.reverse_migration(MigrationKey(MigrationGroup.SYSTEM, "xxx"))

    with pytest.raises(MigrationError):
        runner.reverse_migration(
            MigrationKey(MigrationGroup.EVENTS, "0003_errors"))

    # Reverse with --fake
    for migration_id in reversed(
            get_group_loader(MigrationGroup.EVENTS).get_migrations()):
        runner.reverse_migration(MigrationKey(MigrationGroup.EVENTS,
                                              migration_id),
                                 fake=True)
    assert (len(connection.execute("SHOW TABLES LIKE 'sentry_local'")) == 1
            ), "Table still exists"
Пример #13
0
def test_groupedmessages_compatibility() -> None:
    cluster = get_cluster(StorageSetKey.EVENTS)
    database = cluster.get_database()
    connection = cluster.get_query_connection(ClickhouseClientSettings.MIGRATE)

    # Create old style table witihout project ID
    connection.execute("""
        CREATE TABLE groupedmessage_local (`offset` UInt64, `record_deleted` UInt8,
        `id` UInt64, `status` Nullable(UInt8), `last_seen` Nullable(DateTime),
        `first_seen` Nullable(DateTime), `active_at` Nullable(DateTime),
        `first_release_id` Nullable(UInt64)) ENGINE = ReplacingMergeTree(offset)
        ORDER BY id SAMPLE BY id SETTINGS index_granularity = 8192
        """)

    migration_id = "0010_groupedmessages_onpremise_compatibility"

    runner = Runner()
    runner.run_migration(MigrationKey(MigrationGroup.SYSTEM,
                                      "0001_migrations"))
    events_migrations = get_group_loader(
        MigrationGroup.EVENTS).get_migrations()

    # Mark prior migrations complete
    for migration in events_migrations[:(
            events_migrations.index(migration_id))]:
        runner._update_migration_status(
            MigrationKey(MigrationGroup.EVENTS, migration), Status.COMPLETED)

    runner.run_migration(
        MigrationKey(MigrationGroup.EVENTS, migration_id),
        force=True,
    )

    assert connection.execute(
        f"SELECT primary_key FROM system.tables WHERE name = 'groupedmessage_local' AND database = '{database}'"
    ) == [("project_id, id", )]