def test_recover_staled_replica(started_cluster): main_node.query( "CREATE DATABASE recover ENGINE = Replicated('/clickhouse/databases/recover', 'shard1', 'replica1');" ) started_cluster.get_kazoo_client("zoo1").set( "/clickhouse/databases/recover/logs_to_keep", b"10") dummy_node.query( "CREATE DATABASE recover ENGINE = Replicated('/clickhouse/databases/recover', 'shard1', 'replica2');" ) settings = {"distributed_ddl_task_timeout": 0} main_node.query("CREATE TABLE recover.t1 (n int) ENGINE=Memory", settings=settings) dummy_node.query("CREATE TABLE recover.t2 (s String) ENGINE=Memory", settings=settings) main_node.query( "CREATE TABLE recover.mt1 (n int) ENGINE=MergeTree order by n", settings=settings, ) dummy_node.query( "CREATE TABLE recover.mt2 (n int) ENGINE=MergeTree order by n", settings=settings, ) main_node.query( "CREATE TABLE recover.rmt1 (n int) ENGINE=ReplicatedMergeTree order by n", settings=settings, ) dummy_node.query( "CREATE TABLE recover.rmt2 (n int) ENGINE=ReplicatedMergeTree order by n", settings=settings, ) main_node.query( "CREATE TABLE recover.rmt3 (n int) ENGINE=ReplicatedMergeTree order by n", settings=settings, ) dummy_node.query( "CREATE TABLE recover.rmt5 (n int) ENGINE=ReplicatedMergeTree order by n", settings=settings, ) main_node.query( "CREATE MATERIALIZED VIEW recover.mv1 (n int) ENGINE=ReplicatedMergeTree order by n AS SELECT n FROM recover.rmt1", settings=settings, ) dummy_node.query( "CREATE MATERIALIZED VIEW recover.mv2 (n int) ENGINE=ReplicatedMergeTree order by n AS SELECT n FROM recover.rmt2", settings=settings, ) main_node.query( "CREATE DICTIONARY recover.d1 (n int DEFAULT 0, m int DEFAULT 1) PRIMARY KEY n " "SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'rmt1' PASSWORD '' DB 'recover')) " "LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT())") dummy_node.query( "CREATE DICTIONARY recover.d2 (n int DEFAULT 0, m int DEFAULT 1) PRIMARY KEY n " "SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'rmt2' PASSWORD '' DB 'recover')) " "LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT())") for table in ["t1", "t2", "mt1", "mt2", "rmt1", "rmt2", "rmt3", "rmt5"]: main_node.query("INSERT INTO recover.{} VALUES (42)".format(table)) for table in ["t1", "t2", "mt1", "mt2"]: dummy_node.query("INSERT INTO recover.{} VALUES (42)".format(table)) for table in ["rmt1", "rmt2", "rmt3", "rmt5"]: main_node.query("SYSTEM SYNC REPLICA recover.{}".format(table)) with PartitionManager() as pm: pm.drop_instance_zk_connections(dummy_node) dummy_node.query_and_get_error("RENAME TABLE recover.t1 TO recover.m1") main_node.query_with_retry("RENAME TABLE recover.t1 TO recover.m1", settings=settings) main_node.query_with_retry("ALTER TABLE recover.mt1 ADD COLUMN m int", settings=settings) main_node.query_with_retry("ALTER TABLE recover.rmt1 ADD COLUMN m int", settings=settings) main_node.query_with_retry("RENAME TABLE recover.rmt3 TO recover.rmt4", settings=settings) main_node.query_with_retry("DROP TABLE recover.rmt5", settings=settings) main_node.query_with_retry("DROP DICTIONARY recover.d2", settings=settings) main_node.query_with_retry( "CREATE DICTIONARY recover.d2 (n int DEFAULT 0, m int DEFAULT 1) PRIMARY KEY n " "SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'rmt1' PASSWORD '' DB 'recover')) " "LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT());", settings=settings, ) inner_table = (".inner_id." + dummy_node.query_with_retry( "SELECT uuid FROM system.tables WHERE database='recover' AND name='mv1'" ).strip()) main_node.query_with_retry( "ALTER TABLE recover.`{}` MODIFY COLUMN n int DEFAULT 42".format( inner_table), settings=settings, ) main_node.query_with_retry( "ALTER TABLE recover.mv1 MODIFY QUERY SELECT m FROM recover.rmt1". format(inner_table), settings=settings, ) main_node.query_with_retry( "RENAME TABLE recover.mv2 TO recover.mv3".format(inner_table), settings=settings, ) main_node.query_with_retry("CREATE TABLE recover.tmp AS recover.m1", settings=settings) main_node.query_with_retry("DROP TABLE recover.tmp", settings=settings) main_node.query_with_retry("CREATE TABLE recover.tmp AS recover.m1", settings=settings) main_node.query_with_retry("DROP TABLE recover.tmp", settings=settings) main_node.query_with_retry("CREATE TABLE recover.tmp AS recover.m1", settings=settings) assert (main_node.query( "SELECT name FROM system.tables WHERE database='recover' AND name NOT LIKE '.inner_id.%' ORDER BY name" ) == "d1\nd2\nm1\nmt1\nmt2\nmv1\nmv3\nrmt1\nrmt2\nrmt4\nt2\ntmp\n") query = ( "SELECT name, uuid, create_table_query FROM system.tables WHERE database='recover' AND name NOT LIKE '.inner_id.%' " "ORDER BY name SETTINGS show_table_uuid_in_table_create_query_if_not_nil=1" ) expected = main_node.query(query) assert_eq_with_retry(dummy_node, query, expected) assert (main_node.query( "SELECT count() FROM system.tables WHERE database='recover' AND name LIKE '.inner_id.%'" ) == "2\n") assert (dummy_node.query( "SELECT count() FROM system.tables WHERE database='recover' AND name LIKE '.inner_id.%'" ) == "2\n") for table in [ "m1", "t2", "mt1", "mt2", "rmt1", "rmt2", "rmt4", "d1", "d2", "mv1", "mv3", ]: assert main_node.query( "SELECT (*,).1 FROM recover.{}".format(table)) == "42\n" for table in [ "t2", "rmt1", "rmt2", "rmt4", "d1", "d2", "mt2", "mv1", "mv3" ]: assert dummy_node.query( "SELECT (*,).1 FROM recover.{}".format(table)) == "42\n" for table in ["m1", "mt1"]: assert dummy_node.query( "SELECT count() FROM recover.{}".format(table)) == "0\n" global test_recover_staled_replica_run assert (dummy_node.query( "SELECT count() FROM system.tables WHERE database='recover_broken_tables'" ) == f"{test_recover_staled_replica_run}\n") assert (dummy_node.query( "SELECT count() FROM system.tables WHERE database='recover_broken_replicated_tables'" ) == f"{test_recover_staled_replica_run}\n") test_recover_staled_replica_run += 1 table = dummy_node.query( "SHOW TABLES FROM recover_broken_tables LIKE 'mt1_29_%' LIMIT 1" ).strip() assert (dummy_node.query( "SELECT (*,).1 FROM recover_broken_tables.{}".format(table)) == "42\n") table = dummy_node.query( "SHOW TABLES FROM recover_broken_replicated_tables LIKE 'rmt5_29_%' LIMIT 1" ).strip() assert (dummy_node.query( "SELECT (*,).1 FROM recover_broken_replicated_tables.{}".format(table)) == "42\n") expected = "Cleaned 6 outdated objects: dropped 1 dictionaries and 3 tables, moved 2 tables" assert_logs_contain(dummy_node, expected) dummy_node.query("DROP TABLE recover.tmp") assert_eq_with_retry( main_node, "SELECT count() FROM system.tables WHERE database='recover' AND name='tmp'", "0\n", ) main_node.query("DROP DATABASE recover SYNC") dummy_node.query("DROP DATABASE recover SYNC")
def test_recover_staled_replica(started_cluster): main_node.query( "CREATE DATABASE recover ENGINE = Replicated('/clickhouse/databases/recover', 'shard1', 'replica1');" ) started_cluster.get_kazoo_client('zoo1').set( '/clickhouse/databases/recover/logs_to_keep', b'10') dummy_node.query( "CREATE DATABASE recover ENGINE = Replicated('/clickhouse/databases/recover', 'shard1', 'replica2');" ) settings = {"distributed_ddl_task_timeout": 0} main_node.query("CREATE TABLE recover.t1 (n int) ENGINE=Memory", settings=settings) dummy_node.query("CREATE TABLE recover.t2 (s String) ENGINE=Memory", settings=settings) main_node.query( "CREATE TABLE recover.mt1 (n int) ENGINE=MergeTree order by n", settings=settings) dummy_node.query( "CREATE TABLE recover.mt2 (n int) ENGINE=MergeTree order by n", settings=settings) main_node.query( "CREATE TABLE recover.rmt1 (n int) ENGINE=ReplicatedMergeTree order by n", settings=settings) dummy_node.query( "CREATE TABLE recover.rmt2 (n int) ENGINE=ReplicatedMergeTree order by n", settings=settings) main_node.query( "CREATE TABLE recover.rmt3 (n int) ENGINE=ReplicatedMergeTree order by n", settings=settings) dummy_node.query( "CREATE TABLE recover.rmt5 (n int) ENGINE=ReplicatedMergeTree order by n", settings=settings) main_node.query( "CREATE DICTIONARY recover.d1 (n int DEFAULT 0, m int DEFAULT 1) PRIMARY KEY n SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'rmt1' PASSWORD '' DB 'recover')) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT())" ) dummy_node.query( "CREATE DICTIONARY recover.d2 (n int DEFAULT 0, m int DEFAULT 1) PRIMARY KEY n SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'rmt2' PASSWORD '' DB 'recover')) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT())" ) for table in ['t1', 't2', 'mt1', 'mt2', 'rmt1', 'rmt2', 'rmt3', 'rmt5']: main_node.query("INSERT INTO recover.{} VALUES (42)".format(table)) for table in ['t1', 't2', 'mt1', 'mt2']: dummy_node.query("INSERT INTO recover.{} VALUES (42)".format(table)) for table in ['rmt1', 'rmt2', 'rmt3', 'rmt5']: main_node.query("SYSTEM SYNC REPLICA recover.{}".format(table)) with PartitionManager() as pm: pm.drop_instance_zk_connections(dummy_node) dummy_node.query_and_get_error("RENAME TABLE recover.t1 TO recover.m1") main_node.query("RENAME TABLE recover.t1 TO recover.m1", settings=settings) main_node.query("ALTER TABLE recover.mt1 ADD COLUMN m int", settings=settings) main_node.query("ALTER TABLE recover.rmt1 ADD COLUMN m int", settings=settings) main_node.query("RENAME TABLE recover.rmt3 TO recover.rmt4", settings=settings) main_node.query("DROP TABLE recover.rmt5", settings=settings) main_node.query("DROP DICTIONARY recover.d2", settings=settings) main_node.query( "CREATE DICTIONARY recover.d2 (n int DEFAULT 0, m int DEFAULT 1) PRIMARY KEY n SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'rmt1' PASSWORD '' DB 'recover')) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT());", settings=settings) main_node.query("CREATE TABLE recover.tmp AS recover.m1", settings=settings) main_node.query("DROP TABLE recover.tmp", settings=settings) main_node.query("CREATE TABLE recover.tmp AS recover.m1", settings=settings) main_node.query("DROP TABLE recover.tmp", settings=settings) main_node.query("CREATE TABLE recover.tmp AS recover.m1", settings=settings) main_node.query("DROP TABLE recover.tmp", settings=settings) main_node.query("CREATE TABLE recover.tmp AS recover.m1", settings=settings) assert main_node.query( "SELECT name FROM system.tables WHERE database='recover' ORDER BY name" ) == "d1\nd2\nm1\nmt1\nmt2\nrmt1\nrmt2\nrmt4\nt2\ntmp\n" query = "SELECT name, uuid, create_table_query FROM system.tables WHERE database='recover' ORDER BY name" expected = main_node.query(query) assert_eq_with_retry(dummy_node, query, expected) for table in [ 'm1', 't2', 'mt1', 'mt2', 'rmt1', 'rmt2', 'rmt4', 'd1', 'd2' ]: assert main_node.query( "SELECT (*,).1 FROM recover.{}".format(table)) == "42\n" for table in ['t2', 'rmt1', 'rmt2', 'rmt4', 'd1', 'd2', 'mt2']: assert dummy_node.query( "SELECT (*,).1 FROM recover.{}".format(table)) == "42\n" for table in ['m1', 'mt1']: assert dummy_node.query( "SELECT count() FROM recover.{}".format(table)) == "0\n" assert dummy_node.query( "SELECT count() FROM system.tables WHERE database='recover_broken_tables'" ) == "2\n" table = dummy_node.query( "SHOW TABLES FROM recover_broken_tables LIKE 'mt1_26_%'").strip() assert dummy_node.query( "SELECT (*,).1 FROM recover_broken_tables.{}".format(table)) == "42\n" table = dummy_node.query( "SHOW TABLES FROM recover_broken_tables LIKE 'rmt5_26_%'").strip() assert dummy_node.query( "SELECT (*,).1 FROM recover_broken_tables.{}".format(table)) == "42\n" expected = "Cleaned 4 outdated objects: dropped 1 dictionaries and 1 tables, moved 2 tables" assert_logs_contain(dummy_node, expected) dummy_node.query("DROP TABLE recover.tmp") assert_eq_with_retry( main_node, "SELECT count() FROM system.tables WHERE database='recover' AND name='tmp'", "0\n")