def test_run_migration() -> None: runner = Runner() runner.run_migration(MigrationKey(MigrationGroup.SYSTEM, "0001_migrations")) connection = get_cluster(StorageSetKey.MIGRATIONS).get_query_connection( ClickhouseClientSettings.MIGRATE) assert connection.execute( "SELECT group, migration_id, status, version FROM migrations_local;" ) == [("system", "0001_migrations", "completed", 1)] # Invalid migration ID with pytest.raises(MigrationError): runner.run_migration(MigrationKey(MigrationGroup.SYSTEM, "xxx")) # Run out of order with pytest.raises(MigrationError): runner.run_migration(MigrationKey(MigrationGroup.EVENTS, "0003_errors")) # Running with --fake runner.run_migration(MigrationKey(MigrationGroup.EVENTS, "0001_events_initial"), fake=True) assert connection.execute("SHOW TABLES LIKE 'sentry_local'") == []
def test_transactions_compatibility() -> None: cluster = get_cluster(StorageSetKey.TRANSACTIONS) connection = cluster.get_query_connection(ClickhouseClientSettings.MIGRATE) def get_sampling_key() -> str: database = cluster.get_database() ((sampling_key, ), ) = connection.execute( f"SELECT sampling_key FROM system.tables WHERE name = 'transactions_local' AND database = '{database}'" ) return sampling_key # Create old style table without sampling expression and insert data connection.execute(""" CREATE TABLE transactions_local (`project_id` UInt64, `event_id` UUID, `trace_id` UUID, `span_id` UInt64, `transaction_name` LowCardinality(String), `transaction_hash` UInt64 MATERIALIZED CAST(cityHash64(transaction_name), 'UInt64'), `transaction_op` LowCardinality(String), `transaction_status` UInt8 DEFAULT 2, `start_ts` DateTime, `start_ms` UInt16, `finish_ts` DateTime, `finish_ms` UInt16, `duration` UInt32, `platform` LowCardinality(String), `environment` LowCardinality(Nullable(String)), `release` LowCardinality(Nullable(String)), `dist` LowCardinality(Nullable(String)), `ip_address_v4` Nullable(IPv4), `ip_address_v6` Nullable(IPv6), `user` String DEFAULT '', `user_hash` UInt64 MATERIALIZED cityHash64(user), `user_id` Nullable(String), `user_name` Nullable(String), `user_email` Nullable(String), `sdk_name` LowCardinality(String) DEFAULT CAST('', 'LowCardinality(String)'), `sdk_version` LowCardinality(String) DEFAULT CAST('', 'LowCardinality(String)'), `http_method` LowCardinality(Nullable(String)) DEFAULT CAST('', 'LowCardinality(Nullable(String))'), `http_referer` Nullable(String), `tags.key` Array(String), `tags.value` Array(String), `_tags_flattened` String, `contexts.key` Array(String), `contexts.value` Array(String), `_contexts_flattened` String, `partition` UInt16, `offset` UInt64, `message_timestamp` DateTime, `retention_days` UInt16, `deleted` UInt8) ENGINE = ReplacingMergeTree(deleted) PARTITION BY (retention_days, toMonday(finish_ts)) ORDER BY (project_id, toStartOfDay(finish_ts), transaction_name, cityHash64(span_id)) TTL finish_ts + toIntervalDay(retention_days); """) assert get_sampling_key() == "" generate_transactions() runner = Runner() runner.run_migration(MigrationKey(MigrationGroup.SYSTEM, "0001_migrations")) runner._update_migration_status( MigrationKey(MigrationGroup.TRANSACTIONS, "0001_transactions"), Status.COMPLETED) runner.run_migration( MigrationKey( MigrationGroup.TRANSACTIONS, "0002_transactions_onpremise_fix_orderby_and_partitionby", ), force=True, ) assert get_sampling_key() == "cityHash64(span_id)" assert connection.execute("SELECT count(*) FROM transactions_local;") == [ (5, ) ]
def test_version() -> None: runner = Runner() runner.run_migration(MigrationKey(MigrationGroup.SYSTEM, "0001_migrations")) migration_key = MigrationKey(MigrationGroup.EVENTS, "test") assert runner._get_next_version(migration_key) == 1 runner._update_migration_status(migration_key, Status.IN_PROGRESS) assert runner._get_next_version(migration_key) == 2 runner._update_migration_status(migration_key, Status.COMPLETED) assert runner._get_next_version(migration_key) == 3
def test_groupedmessages_compatibility() -> None: cluster = get_cluster(StorageSetKey.EVENTS) # Ignore the multi node mode because this tests a migration # for an older table state that only applied to single node if not cluster.is_single_node(): return database = cluster.get_database() connection = cluster.get_query_connection(ClickhouseClientSettings.MIGRATE) # Create old style table witihout project ID connection.execute(""" CREATE TABLE groupedmessage_local (`offset` UInt64, `record_deleted` UInt8, `id` UInt64, `status` Nullable(UInt8), `last_seen` Nullable(DateTime), `first_seen` Nullable(DateTime), `active_at` Nullable(DateTime), `first_release_id` Nullable(UInt64)) ENGINE = ReplacingMergeTree(offset) ORDER BY id SAMPLE BY id SETTINGS index_granularity = 8192 """) migration_id = "0010_groupedmessages_onpremise_compatibility" runner = Runner() runner.run_migration(MigrationKey(MigrationGroup.SYSTEM, "0001_migrations")) events_migrations = get_group_loader( MigrationGroup.EVENTS).get_migrations() # Mark prior migrations complete for migration in events_migrations[:( events_migrations.index(migration_id))]: runner._update_migration_status( MigrationKey(MigrationGroup.EVENTS, migration), Status.COMPLETED) runner.run_migration( MigrationKey(MigrationGroup.EVENTS, migration_id), force=True, ) outcome = perform_select_query( ["primary_key"], "system.tables", { "name": "groupedmessage_local", "database": str(database) }, None, connection, ) assert outcome == [("project_id, id", )]
def _main() -> None: """ This method takes the output of `git diff --name-status master snuba/migrations` and runs `snuba migrations run -dry-run with the proper parameters`, for a CI action """ diff_result = subprocess.run( [ "git", "diff", "--diff-filter=AM", "--name-only", "origin/master", "--", "snuba/migrations/snuba_migrations/*/[0-9]*.py", ], stdout=subprocess.PIPE, text=True, ) if diff_result.returncode != 0: raise ExecError(diff_result.stdout) else: lines = diff_result.stdout.splitlines() if len(lines) > 0: print("-- start migrations") print() for line in lines: migration_filename = os.path.basename(line) migration_group = MigrationGroup(os.path.basename(os.path.dirname(line))) migration_id, _ = os.path.splitext(migration_filename) runner = Runner() migration_key = MigrationKey(migration_group, migration_id) print(f"-- migration {migration_group.value} : {migration_id}") runner.run_migration(migration_key, dry_run=True) print(f"-- end migration {migration_group.value} : {migration_id}")
def test_get_pending_migrations() -> None: runner = Runner() total_migrations = get_total_migration_count() assert len(runner._get_pending_migrations()) == total_migrations runner.run_migration(MigrationKey(MigrationGroup.SYSTEM, "0001_migrations")) assert len(runner._get_pending_migrations()) == total_migrations - 1
def test_get_status() -> None: runner = Runner() assert runner.get_status( MigrationKey(MigrationGroup.EVENTS, "0001_events_initial")) == (Status.NOT_STARTED, None) runner.run_migration(MigrationKey(MigrationGroup.SYSTEM, "0001_migrations")) assert runner.get_status( MigrationKey(MigrationGroup.EVENTS, "0001_events_initial")) == (Status.NOT_STARTED, None) runner.run_migration( MigrationKey(MigrationGroup.EVENTS, "0001_events_initial")) status = runner.get_status( MigrationKey(MigrationGroup.EVENTS, "0001_events_initial")) assert status[0] == Status.COMPLETED assert isinstance(status[1], datetime)
def run(group: str, migration_id: str, force: bool, fake: bool, dry_run: bool) -> None: """ Runs a single migration. --force must be passed in order to run blocking migrations. --fake marks a migration as completed without running anything. Migrations that are already in an in-progress or completed status will not be run. """ if not dry_run: check_clickhouse_connections() runner = Runner() migration_group = MigrationGroup(group) migration_key = MigrationKey(migration_group, migration_id) if dry_run: runner.run_migration(migration_key, dry_run=True) return try: if fake: click.confirm( "This will mark the migration as completed without actually running it. Your database may be in an invalid state. Are you sure?", abort=True, ) runner.run_migration(migration_key, force=force, fake=fake) except MigrationError as e: raise click.ClickException(str(e)) click.echo(f"Finished running migration {migration_key}")
def reverse(group: str, migration_id: str, force: bool, fake: bool, dry_run: bool) -> None: """ Reverses a single migration. --force is required to reverse an already completed migration. --fake marks a migration as reversed without doing anything. """ if not dry_run: check_clickhouse_connections() runner = Runner() migration_group = MigrationGroup(group) migration_key = MigrationKey(migration_group, migration_id) if dry_run: runner.reverse_migration(migration_key, dry_run=True) return try: if fake: click.confirm( "This will mark the migration as not started without actually reversing it. Your database may be in an invalid state. Are you sure?", abort=True, ) runner.reverse_migration(migration_key, force=force, fake=fake) except MigrationError as e: raise click.ClickException(str(e)) click.echo(f"Finished reversing migration {migration_key}")
def run_prior_migrations( migration_group: MigrationGroup, stop_migration_id: str, runner: Runner ) -> None: """Runs all migrations up to the migration denoted by migration ID Arguments: migration_group -- the group of the desired migration stop_migration_id -- desired migration ID, as a stopping point runner -- migration runner object """ right_migrations = next( group_migrations for (group, group_migrations) in runner.show_all() if group == migration_group ) # Run migrations up to the desired 'stop' ID for migration in right_migrations: if migration.migration_id == stop_migration_id: break runner.run_migration( MigrationKey(migration_group, migration.migration_id), force=True )
def test_backfill_errors() -> None: backfill_migration_id = "0014_backfill_errors" runner = Runner() runner.run_migration(MigrationKey(MigrationGroup.SYSTEM, "0001_migrations")) run_prior_migrations(MigrationGroup.EVENTS, backfill_migration_id, runner) errors_storage = get_writable_storage(StorageKey.ERRORS) clickhouse = errors_storage.get_cluster().get_query_connection( ClickhouseClientSettings.QUERY) errors_table_name = errors_storage.get_table_writer().get_schema( ).get_table_name() raw_events = [] for i in range(10): event = get_raw_event() raw_events.append(event) events_storage = get_writable_storage(StorageKey.EVENTS) write_unprocessed_events(events_storage, raw_events) assert get_count_from_storage(errors_table_name, clickhouse) == 0 # Run 0014_backfill_errors runner.run_migration(MigrationKey(MigrationGroup.EVENTS, backfill_migration_id), force=True) assert get_count_from_storage(errors_table_name, clickhouse) == 10 outcome = perform_select_query(["contexts.key", "contexts.value"], errors_table_name, None, str(1), clickhouse) assert outcome[0] == ( [ "device.model_id", "geo.city", "geo.country_code", "geo.region", "os.kernel_version", ], ["Galaxy", "San Francisco", "US", "CA", "1.1.1"], )
def test_reverse_migration() -> None: runner = Runner() runner.run_all(force=True) connection = get_cluster(StorageSetKey.MIGRATIONS).get_query_connection( ClickhouseClientSettings.MIGRATE) # Invalid migration ID with pytest.raises(MigrationError): runner.reverse_migration(MigrationKey(MigrationGroup.SYSTEM, "xxx")) with pytest.raises(MigrationError): runner.reverse_migration( MigrationKey(MigrationGroup.EVENTS, "0003_errors")) # Reverse with --fake for migration_id in reversed( get_group_loader(MigrationGroup.EVENTS).get_migrations()): runner.reverse_migration(MigrationKey(MigrationGroup.EVENTS, migration_id), fake=True) assert (len(connection.execute("SHOW TABLES LIKE 'sentry_local'")) == 1 ), "Table still exists"
def test_groupedmessages_compatibility() -> None: cluster = get_cluster(StorageSetKey.EVENTS) database = cluster.get_database() connection = cluster.get_query_connection(ClickhouseClientSettings.MIGRATE) # Create old style table witihout project ID connection.execute(""" CREATE TABLE groupedmessage_local (`offset` UInt64, `record_deleted` UInt8, `id` UInt64, `status` Nullable(UInt8), `last_seen` Nullable(DateTime), `first_seen` Nullable(DateTime), `active_at` Nullable(DateTime), `first_release_id` Nullable(UInt64)) ENGINE = ReplacingMergeTree(offset) ORDER BY id SAMPLE BY id SETTINGS index_granularity = 8192 """) migration_id = "0010_groupedmessages_onpremise_compatibility" runner = Runner() runner.run_migration(MigrationKey(MigrationGroup.SYSTEM, "0001_migrations")) events_migrations = get_group_loader( MigrationGroup.EVENTS).get_migrations() # Mark prior migrations complete for migration in events_migrations[:( events_migrations.index(migration_id))]: runner._update_migration_status( MigrationKey(MigrationGroup.EVENTS, migration), Status.COMPLETED) runner.run_migration( MigrationKey(MigrationGroup.EVENTS, migration_id), force=True, ) assert connection.execute( f"SELECT primary_key FROM system.tables WHERE name = 'groupedmessage_local' AND database = '{database}'" ) == [("project_id, id", )]