def test_startup_without_zk(started_cluster): with PartitionManager() as pm: pm.drop_instance_zk_connections(main_node) err = main_node.query_and_get_error( "CREATE DATABASE startup ENGINE = Replicated('/clickhouse/databases/startup', 'shard1', 'replica1');" ) assert "ZooKeeper" in err main_node.query( "CREATE DATABASE startup ENGINE = Replicated('/clickhouse/databases/startup', 'shard1', 'replica1');" ) # main_node.query("CREATE TABLE startup.rmt (n int) ENGINE=ReplicatedMergeTree order by n") main_node.query( "CREATE TABLE startup.rmt (n int) ENGINE=MergeTree order by n") main_node.query("INSERT INTO startup.rmt VALUES (42)") with PartitionManager() as pm: pm.drop_instance_zk_connections(main_node) main_node.restart_clickhouse(stop_start_wait_sec=30) assert main_node.query("SELECT (*,).1 FROM startup.rmt") == "42\n" for _ in range(10): try: main_node.query("CREATE TABLE startup.m (n int) ENGINE=Memory") break except: time.sleep(1) main_node.query("EXCHANGE TABLES startup.rmt AND startup.m") assert main_node.query("SELECT (*,).1 FROM startup.m") == "42\n" main_node.query("DROP DATABASE startup SYNC")
def prepare(self, replace_hostnames_with_ips=True): try: for i in xrange(4): self.add_instance( 'ch{}'.format(i+1), config_dir=self.test_config_dir, macros={"layer": 0, "shard": i/2 + 1, "replica": i%2 + 1}, with_zookeeper=True) self.start() # Replace config files for testing ability to set host in DNS and IP formats if replace_hostnames_with_ips: self.replace_domains_to_ip_addresses_in_cluster_config(['ch1', 'ch3']) # Select sacrifice instance to test CONNECTION_LOSS and server fail on it sacrifice = self.instances['ch4'] self.pm_random_drops = PartitionManager() self.pm_random_drops._add_rule({'probability': 0.01, 'destination': sacrifice.ip_address, 'source_port': 2181, 'action': 'REJECT --reject-with tcp-reset'}) self.pm_random_drops._add_rule({'probability': 0.01, 'source': sacrifice.ip_address, 'destination_port': 2181, 'action': 'REJECT --reject-with tcp-reset'}) # Initialize databases and service tables instance = self.instances['ch1'] self.ddl_check_query(instance, """ CREATE TABLE IF NOT EXISTS all_tables ON CLUSTER 'cluster_no_replicas' (database String, name String, engine String, metadata_modification_time DateTime) ENGINE = Distributed('cluster_no_replicas', 'system', 'tables') """) self.ddl_check_query(instance, "CREATE DATABASE IF NOT EXISTS test ON CLUSTER 'cluster'") except Exception as e: print e raise
def test_in_memory_wal(start_cluster): # Merges are disabled in config for i in range(5): insert_random_data('wal_table', node11, 50) node12.query("SYSTEM SYNC REPLICA wal_table", timeout=20) def check(node, rows, parts): node.query("SELECT count() FROM wal_table") == "{}\n".format(rows) node.query("SELECT count() FROM system.parts WHERE table = 'wal_table' AND part_type = 'InMemory'") == "{}\n".format(parts) check(node11, 250, 5) check(node12, 250, 5) # WAL works at inserts node11.restart_clickhouse(kill=True) check(node11, 250, 5) # WAL works at fetches node12.restart_clickhouse(kill=True) check(node12, 250, 5) insert_random_data('wal_table', node11, 50) node12.query("SYSTEM SYNC REPLICA wal_table", timeout=20) # Disable replication with PartitionManager() as pm: pm.partition_instances(node11, node12) check(node11, 300, 6) wal_file = os.path.join(node11.path, "database/data/default/wal_table/wal.bin") # Corrupt wal file open(wal_file, 'rw+').truncate(os.path.getsize(wal_file) - 10) node11.restart_clickhouse(kill=True) # Broken part is lost, but other restored successfully check(node11, 250, 5) # WAL with blocks from 0 to 4 broken_wal_file = os.path.join(node11.path, "database/data/default/wal_table/wal_0_4.bin") assert os.path.exists(broken_wal_file) # Fetch lost part from replica node11.query("SYSTEM SYNC REPLICA wal_table", timeout=20) check(node11, 300, 6) #Check that new data is written to new wal, but old is still exists for restoring assert os.path.getsize(wal_file) > 0 assert os.path.exists(broken_wal_file) # Data is lost without WAL node11.query("ALTER TABLE wal_table MODIFY SETTING in_memory_parts_enable_wal = 0") with PartitionManager() as pm: pm.partition_instances(node11, node12) insert_random_data('wal_table', node11, 50) check(node11, 350, 7) node11.restart_clickhouse(kill=True) check(node11, 300, 6)
def prepare(self, replace_hostnames_with_ips=True): try: main_configs_files = ["clusters.xml", "zookeeper_session_timeout.xml", "macro.xml", "query_log.xml", "ddl.xml"] main_configs = [os.path.join(self.test_config_dir, "config.d", f) for f in main_configs_files] user_configs = [os.path.join(self.test_config_dir, "users.d", f) for f in ["restricted_user.xml", "query_log.xml"]] if self.test_config_dir == "configs_secure": main_configs += [os.path.join(self.test_config_dir, f) for f in ["server.crt", "server.key", "dhparam.pem", "config.d/ssl_conf.xml"]] for i in range(4): self.add_instance( 'ch{}'.format(i + 1), main_configs=main_configs, user_configs=user_configs, macros={"layer": 0, "shard": i // 2 + 1, "replica": i % 2 + 1}, with_zookeeper=True) self.start() # Replace config files for testing ability to set host in DNS and IP formats if replace_hostnames_with_ips: self.replace_domains_to_ip_addresses_in_cluster_config(['ch1', 'ch3']) # Select sacrifice instance to test CONNECTION_LOSS and server fail on it sacrifice = self.instances['ch4'] self.pm_random_drops = PartitionManager() self.pm_random_drops._add_rule( {'probability': 0.01, 'destination': sacrifice.ip_address, 'source_port': 2181, 'action': 'REJECT --reject-with tcp-reset'}) self.pm_random_drops._add_rule( {'probability': 0.01, 'source': sacrifice.ip_address, 'destination_port': 2181, 'action': 'REJECT --reject-with tcp-reset'}) # Initialize databases and service tables instance = self.instances['ch1'] self.ddl_check_query(instance, """ CREATE TABLE IF NOT EXISTS all_tables ON CLUSTER 'cluster_no_replicas' (database String, name String, engine String, metadata_modification_time DateTime) ENGINE = Distributed('cluster_no_replicas', 'system', 'tables') """) self.ddl_check_query(instance, "CREATE DATABASE IF NOT EXISTS test ON CLUSTER 'cluster'") except Exception as e: print(e) raise
def test_simple_dict_get(started_cluster): assert None != dictionary_node.get_process_pid("clickhouse"), "ClickHouse must be alive" def test_helper(): assert '7' == main_node.query("select dictGet('anime_dict', 'i8', toUInt64(7));").rstrip(), "Wrong answer." assert '7' == main_node.query("select dictGet('anime_dict', 'i16', toUInt64(7));").rstrip(), "Wrong answer." assert '7' == main_node.query("select dictGet('anime_dict', 'i32', toUInt64(7));").rstrip(), "Wrong answer." assert '7' == main_node.query("select dictGet('anime_dict', 'i64', toUInt64(7));").rstrip(), "Wrong answer." assert '7' == main_node.query("select dictGet('anime_dict', 'u8', toUInt64(7));").rstrip(), "Wrong answer." assert '7' == main_node.query("select dictGet('anime_dict', 'u16', toUInt64(7));").rstrip(), "Wrong answer." assert '7' == main_node.query("select dictGet('anime_dict', 'u32', toUInt64(7));").rstrip(), "Wrong answer." assert '7' == main_node.query("select dictGet('anime_dict', 'u64', toUInt64(7));").rstrip(), "Wrong answer." test_helper() with PartitionManager() as pm, ClickHouseKiller(dictionary_node): assert None == dictionary_node.get_process_pid("clickhouse") # Remove connection between main_node and dictionary for sure pm.heal_all() pm.partition_instances(main_node, dictionary_node) # Dictionary max lifetime is 2 seconds. time.sleep(3) test_helper()
def test_reconnect(started_cluster): instance = instance_test_reconnect with PartitionManager() as pm: # Open a connection for insertion. instance.query("INSERT INTO local1_source VALUES (1)") time.sleep(1) assert remote.query("SELECT count(*) FROM local1").strip() == '1' # Now break the connection. pm.partition_instances(instance, remote, action='REJECT --reject-with tcp-reset') instance.query("INSERT INTO local1_source VALUES (2)") time.sleep(1) # Heal the partition and insert more data. # The connection must be reestablished and after some time all data must be inserted. pm.heal_all() time.sleep(1) instance.query("INSERT INTO local1_source VALUES (3)") time.sleep(1) assert remote.query("SELECT count(*) FROM local1").strip() == '3'
def test(started_cluster): # Check that the data has been inserted into correct tables. assert_eq_with_retry(node1, "SELECT id FROM shard_0.replicated", '111') assert_eq_with_retry(node1, "SELECT id FROM shard_2.replicated", '333') assert_eq_with_retry(node2, "SELECT id FROM shard_0.replicated", '111') assert_eq_with_retry(node2, "SELECT id FROM shard_1.replicated", '222') assert_eq_with_retry(node3, "SELECT id FROM shard_1.replicated", '222') assert_eq_with_retry(node3, "SELECT id FROM shard_2.replicated", '333') # Check that SELECT from the Distributed table works. expected_from_distributed = '''\ 2017-06-16 111 0 2017-06-16 222 1 2017-06-16 333 2 ''' assert_eq_with_retry(node1, "SELECT * FROM distributed ORDER BY id", expected_from_distributed) assert_eq_with_retry(node2, "SELECT * FROM distributed ORDER BY id", expected_from_distributed) assert_eq_with_retry(node3, "SELECT * FROM distributed ORDER BY id", expected_from_distributed) # Now isolate node3 from other nodes and check that SELECTs on other nodes still work. with PartitionManager() as pm: pm.partition_instances(node3, node1, action='REJECT --reject-with tcp-reset') pm.partition_instances(node3, node2, action='REJECT --reject-with tcp-reset') assert_eq_with_retry(node1, "SELECT * FROM distributed ORDER BY id", expected_from_distributed) assert_eq_with_retry(node2, "SELECT * FROM distributed ORDER BY id", expected_from_distributed) with pytest.raises(Exception): print(node3.query_with_retry("SELECT * FROM distributed ORDER BY id", retry_count=5))
def test_random_inserts(started_cluster): # Duration of the test, reduce it if don't want to wait DURATION_SECONDS = 10# * 60 node1.query(""" CREATE TABLE simple ON CLUSTER test_cluster (date Date, i UInt32, s String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/simple', '{replica}', date, i, 8192)""") with PartitionManager() as pm_random_drops: for sacrifice in nodes: pass # This test doesn't work with partition problems still #pm_random_drops._add_rule({'probability': 0.01, 'destination': sacrifice.ip_address, 'source_port': 2181, 'action': 'REJECT --reject-with tcp-reset'}) #pm_random_drops._add_rule({'probability': 0.01, 'source': sacrifice.ip_address, 'destination_port': 2181, 'action': 'REJECT --reject-with tcp-reset'}) min_timestamp = int(time.time()) max_timestamp = min_timestamp + DURATION_SECONDS num_timestamps = max_timestamp - min_timestamp + 1 bash_script = os.path.join(os.path.dirname(__file__), "test.sh") inserters = [] for node in nodes: cmd = ['/bin/bash', bash_script, node.ip_address, str(min_timestamp), str(max_timestamp)] inserters.append(CommandRequest(cmd, timeout=DURATION_SECONDS * 2, stdin='')) print node.name, node.ip_address for inserter in inserters: inserter.get_answer() answer="{}\t{}\t{}\t{}\n".format(num_timestamps, num_timestamps, min_timestamp, max_timestamp) for node in nodes: res = node.query("SELECT count(), uniqExact(i), min(i), max(i) FROM simple") assert TSV(res) == TSV(answer), node.name + " : " + node.query("SELECT groupArray(_part), i, count() AS c FROM simple GROUP BY i ORDER BY c DESC LIMIT 1") node1.query("""DROP TABLE simple ON CLUSTER test_cluster""")
def test_inconsistent_parts_if_drop_while_replica_not_active(start_cluster): with PartitionManager() as pm: # insert into all replicas for i in range(50): node1.query( "INSERT INTO test_table VALUES ('2019-08-16', {})".format(i)) assert_eq_with_retry(node2, "SELECT count(*) FROM test_table", node1.query("SELECT count(*) FROM test_table")) # disable network on the first replica pm.partition_instances(node1, node2) pm.drop_instance_zk_connections(node1) # drop all parts on the second replica node2.query_with_retry("ALTER TABLE test_table DROP PARTITION 201908") assert_eq_with_retry(node2, "SELECT count(*) FROM test_table", "0") # insert into the second replica # DROP_RANGE will be removed from the replication log and the first replica will be lost for i in range(50): node2.query( "INSERT INTO test_table VALUES ('2019-08-16', {})".format(50 + i)) # the first replica will be cloned from the second pm.heal_all() assert_eq_with_retry(node1, "SELECT count(*) FROM test_table", node2.query("SELECT count(*) FROM test_table"))
def test_url_reconnect_in_the_middle(started_cluster): bucket = started_cluster.minio_bucket instance = started_cluster.instances["dummy"] table_format = "id String, data String" filename = "test_url_reconnect_{}.tsv".format(random.randint(0, 1000)) instance.query(f"""insert into table function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{filename}', 'TSV', '{table_format}') select number, randomPrintableASCII(number % 1000) from numbers(1000000)""") with PartitionManager() as pm: pm_rule_reject = {'probability': 0.02, 'destination': instance.ip_address, 'source_port': started_cluster.minio_port, 'action': 'REJECT --reject-with tcp-reset'} pm_rule_drop_all = {'destination': instance.ip_address, 'source_port': started_cluster.minio_port, 'action': 'DROP'} pm._add_rule(pm_rule_reject) def select(): global result result = instance.query( f"""select sum(cityHash64(x)) from (select toUInt64(id) + sleep(0.1) as x from url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{filename}', 'TSV', '{table_format}') settings http_max_tries = 10, http_retry_max_backoff_ms=2000, http_send_timeout=1, http_receive_timeout=1)""") assert(int(result), 3914219105369203805) thread = threading.Thread(target=select) thread.start() time.sleep(4) pm._add_rule(pm_rule_drop_all) time.sleep(2) pm._delete_rule(pm_rule_drop_all) pm._delete_rule(pm_rule_reject) thread.join() assert(int(result), 3914219105369203805)
def test_url_reconnect(started_cluster): hdfs_api = started_cluster.hdfs_api with PartitionManager() as pm: node1.query( "insert into table function hdfs('hdfs://hdfs1:9000/storage_big', 'TSV', 'id Int32') select number from numbers(500000)") pm_rule = {'destination': node1.ip_address, 'source_port': 50075, 'action': 'REJECT'} pm._add_rule(pm_rule) def select(): global result result = node1.query( "select sum(cityHash64(id)) from url('http://hdfs1:50075/webhdfs/v1/storage_big?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', 'TSV', 'id Int32') settings http_max_tries = 10, http_retry_max_backoff_ms=1000") assert(int(result), 6581218782194912115) thread = threading.Thread(target=select) thread.start() time.sleep(4) pm._delete_rule(pm_rule) thread.join() assert(int(result), 6581218782194912115) assert node1.contains_in_log("Error: Timeout: connect timed out")
def test_disabled_mysql_server(started_cluster): with contextlib.closing(MySQLNodeInstance(started_cluster)) as mysql_node: mysql_node.query("DROP DATABASE IF EXISTS test_db_disabled;") mysql_node.query("CREATE DATABASE test_db_disabled;") mysql_node.query( "CREATE TABLE test_db_disabled.test_table ( `id` int(11) NOT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB;" ) with PartitionManager() as pm: clickhouse_node.query( "CREATE DATABASE test_db_disabled ENGINE = MySQL('mysql57:3306', 'test_db_disabled', 'root', 'clickhouse')" ) pm._add_rule( { "source": clickhouse_node.ip_address, "destination_port": 3306, "action": "DROP", } ) clickhouse_node.query("SELECT * FROM system.parts") clickhouse_node.query("SELECT * FROM system.mutations") clickhouse_node.query("SELECT * FROM system.graphite_retentions") clickhouse_node.query("DROP DATABASE test_db_disabled")
def test_reconnect(started_cluster, node_name, first_user, query_base): node = NODES[node_name] query = SELECTS_SQL[query_base] if started_cluster.__with_ssl_config: query = query.replace("remote(", "remoteSecure(") # Everything is up, select should work: assert TSV(node.query(query, user=first_user)) == TSV("node1\nnode2") with PartitionManager() as pm: # Break the connection. pm.partition_instances(*list(NODES.values())) # Now it shouldn't: _check_timeout_and_exception(node, first_user, query_base, query) # Other user should have different timeout and exception _check_timeout_and_exception( node, "default" if first_user != "default" else "ready_to_wait", query_base, query, ) # select should work again: assert TSV(node.query(query, user=first_user)) == TSV("node1\nnode2")
def test_replication_after_partition(both_https_cluster): node1.query("truncate table test_table") node2.query("truncate table test_table") manager = PartitionManager() def close(num): manager.partition_instances(node1, node2, port=9010) time.sleep(1) manager.heal_all() def insert_data_and_check(num): node1.query( "insert into test_table values('2019-10-15', {}, 888)".format(num)) time.sleep(0.5) closing_pool = Pool(1) inserting_pool = Pool(5) cres = closing_pool.map_async(close, [random.randint(1, 3) for _ in range(10)]) ires = inserting_pool.map_async(insert_data_and_check, list(range(100))) cres.wait() ires.wait() assert_eq_with_retry(node1, "SELECT count() FROM test_table", '100') assert_eq_with_retry(node2, "SELECT count() FROM test_table", '100')
def test_default_reading(started_cluster): assert None != dictionary_node.get_process_pid("clickhouse"), "ClickHouse must be alive" # Key 0 is not in dictionary, so default value will be returned def test_helper(): assert '42' == main_node.query("select dictGetOrDefault('experimental_dict', 'i8', toUInt64(13), toInt8(42));").rstrip() assert '42' == main_node.query("select dictGetOrDefault('experimental_dict', 'i16', toUInt64(13), toInt16(42));").rstrip() assert '42' == main_node.query("select dictGetOrDefault('experimental_dict', 'i32', toUInt64(13), toInt32(42));").rstrip() assert '42' == main_node.query("select dictGetOrDefault('experimental_dict', 'i64', toUInt64(13), toInt64(42));").rstrip() assert '42' == main_node.query("select dictGetOrDefault('experimental_dict', 'u8', toUInt64(13), toUInt8(42));").rstrip() assert '42' == main_node.query("select dictGetOrDefault('experimental_dict', 'u16', toUInt64(13), toUInt16(42));").rstrip() assert '42' == main_node.query("select dictGetOrDefault('experimental_dict', 'u32', toUInt64(13), toUInt32(42));").rstrip() assert '42' == main_node.query("select dictGetOrDefault('experimental_dict', 'u64', toUInt64(13), toUInt64(42));").rstrip() test_helper() with PartitionManager() as pm, ClickHouseKiller(dictionary_node): assert None == dictionary_node.get_process_pid("clickhouse"), "ClickHouse must be alive" # Remove connection between main_node and dictionary for sure pm.heal_all() pm.partition_instances(main_node, dictionary_node) # Dictionary max lifetime is 2 seconds. time.sleep(3) test_helper()
def test_prefer_localhost_replica(started_cluster): test_query = "SELECT * FROM distributed ORDER BY id;" node1.query("INSERT INTO distributed VALUES (toDate('2017-06-17'), 11)") node2.query("INSERT INTO distributed VALUES (toDate('2017-06-17'), 22)") time.sleep(1.0) expected_distributed = '''\ 2017-06-17\t11 2017-06-17\t22 ''' assert TSV(node1.query(test_query)) == TSV(expected_distributed) assert TSV(node2.query(test_query)) == TSV(expected_distributed) with PartitionManager() as pm: pm.partition_instances(node1, node2, action='REJECT --reject-with tcp-reset') node1.query("INSERT INTO replicated VALUES (toDate('2017-06-17'), 33)") node2.query("INSERT INTO replicated VALUES (toDate('2017-06-17'), 44)") time.sleep(1.0) expected_from_node2 = '''\ 2017-06-17\t11 2017-06-17\t22 2017-06-17\t44 ''' # Query is sent to node2, as it local and prefer_localhost_replica=1 assert TSV(node2.query(test_query)) == TSV(expected_from_node2) expected_from_node1 = '''\ 2017-06-17\t11 2017-06-17\t22 2017-06-17\t33 ''' # Now query is sent to node1, as it higher in order assert TSV( node2.query( "SET load_balancing='in_order'; SET prefer_localhost_replica=0;" + test_query)) == TSV(expected_from_node1)
def test_sync_replica(started_cluster): main_node.query( "CREATE DATABASE test_sync_database ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica1');" ) dummy_node.query( "CREATE DATABASE test_sync_database ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica2');" ) number_of_tables = 1000 settings = {"distributed_ddl_task_timeout": 0} with PartitionManager() as pm: pm.drop_instance_zk_connections(dummy_node) for i in range(number_of_tables): main_node.query( "CREATE TABLE test_sync_database.table_{} (n int) ENGINE=MergeTree order by n" .format(i), settings=settings, ) dummy_node.query("SYSTEM SYNC DATABASE REPLICA test_sync_database") assert dummy_node.query( "SELECT count() FROM system.tables where database='test_sync_database'" ).strip() == str(number_of_tables) assert main_node.query( "SELECT count() FROM system.tables where database='test_sync_database'" ).strip() == str(number_of_tables)
def test_restart_server(started_cluster): with contextlib.closing( MySQLNodeInstance("root", "clickhouse", started_cluster.mysql_ip, started_cluster.mysql_port)) as mysql_node: mysql_node.query("DROP DATABASE IF EXISTS test_restart") clickhouse_node.query("DROP DATABASE IF EXISTS test_restart") clickhouse_node.query_and_get_error( "CREATE DATABASE test_restart ENGINE = MySQL('mysql57:3306', 'test_restart', 'root', 'clickhouse')" ) assert "test_restart" not in clickhouse_node.query("SHOW DATABASES") mysql_node.query( "CREATE DATABASE test_restart DEFAULT CHARACTER SET 'utf8'") mysql_node.query( "CREATE TABLE `test_restart`.`test_table` ( `id` int(11) NOT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB;" ) clickhouse_node.query( "CREATE DATABASE test_restart ENGINE = MySQL('mysql57:3306', 'test_restart', 'root', 'clickhouse')" ) assert "test_restart" in clickhouse_node.query("SHOW DATABASES") assert "test_table" in clickhouse_node.query( "SHOW TABLES FROM test_restart") with PartitionManager() as pm: pm.partition_instances(clickhouse_node, mysql_node, action="REJECT --reject-with tcp-reset") clickhouse_node.restart_clickhouse() clickhouse_node.query_and_get_error( "SHOW TABLES FROM test_restart") assert "test_table" in clickhouse_node.query( "SHOW TABLES FROM test_restart")
def test_merge_doesnt_work_without_zookeeper(start_cluster): node1.query("INSERT INTO test_table VALUES ('2018-10-01', 1), ('2018-10-02', 2), ('2018-10-03', 3)") node1.query("INSERT INTO test_table VALUES ('2018-10-01', 4), ('2018-10-02', 5), ('2018-10-03', 6)") assert node1.query("SELECT count(*) from system.parts where table = 'test_table'") == "2\n" node1.query("OPTIMIZE TABLE test_table FINAL") assert node1.query("SELECT count(*) from system.parts where table = 'test_table'") == "3\n" assert_eq_with_retry(node1, "SELECT count(*) from system.parts where table = 'test_table' and active = 1", "1") node1.query("TRUNCATE TABLE test_table") assert node1.query("SELECT count(*) from system.parts where table = 'test_table'") == "0\n" node1.query("INSERT INTO test_table VALUES ('2018-10-01', 1), ('2018-10-02', 2), ('2018-10-03', 3)") node1.query("INSERT INTO test_table VALUES ('2018-10-01', 4), ('2018-10-02', 5), ('2018-10-03', 6)") assert node1.query("SELECT count(*) from system.parts where table = 'test_table'") == "2\n" with PartitionManager() as pm: node1.query("OPTIMIZE TABLE test_table FINAL") pm.drop_instance_zk_connections(node1) time.sleep(10) # > old_parts_lifetime assert node1.query("SELECT count(*) from system.parts where table = 'test_table'") == "3\n" assert_eq_with_retry(node1, "SELECT count(*) from system.parts where table = 'test_table' and active = 1", "1")
def test(started_cluster): with PartitionManager() as pm: # Hinder replication between replicas of the same shard, but leave the possibility of distributed connection. pm.partition_instances(node_1_1, node_1_2, port=9009) pm.partition_instances(node_2_1, node_2_2, port=9009) node_1_2.query("INSERT INTO replicated VALUES ('2017-05-08', 1)") node_2_2.query("INSERT INTO replicated VALUES ('2017-05-08', 2)") time.sleep(1) # accrue replica delay assert node_1_1.query("SELECT sum(x) FROM replicated").strip() == '0' assert node_1_2.query("SELECT sum(x) FROM replicated").strip() == '1' assert node_2_1.query("SELECT sum(x) FROM replicated").strip() == '0' assert node_2_2.query("SELECT sum(x) FROM replicated").strip() == '2' # With in_order balancing first replicas are chosen. assert instance_with_dist_table.query( "SELECT count() FROM distributed SETTINGS load_balancing='in_order'" ).strip() == '0' # When we set max_replica_delay, first replicas must be excluded. assert instance_with_dist_table.query(''' SELECT sum(x) FROM distributed SETTINGS load_balancing='in_order', max_replica_delay_for_distributed_queries=1 ''').strip() == '3' pm.drop_instance_zk_connections(node_1_2) pm.drop_instance_zk_connections(node_2_2) time.sleep( 4 ) # allow pings to zookeeper to timeout (must be greater than ZK session timeout). # At this point all replicas are stale, but the query must still go to second replicas which are the least stale ones. assert instance_with_dist_table.query(''' SELECT sum(x) FROM distributed SETTINGS load_balancing='in_order', max_replica_delay_for_distributed_queries=1 ''').strip() == '3' # If we forbid stale replicas, the query must fail. with pytest.raises(Exception): print instance_with_dist_table.query(''' SELECT count() FROM distributed SETTINGS load_balancing='in_order', max_replica_delay_for_distributed_queries=1, fallback_to_stale_replicas_for_distributed_queries=0 ''') # Now partition off the remote replica of the local shard and test that failover still works. pm.partition_instances(node_1_1, node_1_2, port=9000) assert instance_with_dist_table.query(''' SELECT sum(x) FROM distributed SETTINGS load_balancing='in_order', max_replica_delay_for_distributed_queries=1 ''').strip() == '2'
def test_inserts_batching(started_cluster): instance = instance_test_inserts_batching with PartitionManager() as pm: pm.partition_instances(instance, remote) instance.query( "INSERT INTO distributed(d, x) VALUES ('2000-01-01', 1)") # Sleep a bit so that this INSERT forms a batch of its own. time.sleep(0.1) instance.query( "INSERT INTO distributed(x, d) VALUES (2, '2000-01-01')") for i in range(3, 7): instance.query( "INSERT INTO distributed(d, x) VALUES ('2000-01-01', {})". format(i)) for i in range(7, 9): instance.query( "INSERT INTO distributed(x, d) VALUES ({}, '2000-01-01')". format(i)) instance.query( "INSERT INTO distributed(d, x) VALUES ('2000-01-01', 9)") # After ALTER the structure of the saved blocks will be different instance.query("ALTER TABLE distributed ADD COLUMN s String") for i in range(10, 13): instance.query( "INSERT INTO distributed(d, x) VALUES ('2000-01-01', {})". format(i)) instance.query("SYSTEM FLUSH DISTRIBUTED distributed") time.sleep(1.0) result = remote.query( "SELECT _part, groupArray(x) FROM local2 GROUP BY _part ORDER BY _part" ) # Explanation: as merges are turned off on remote instance, active parts in local2 table correspond 1-to-1 # to inserted blocks. # Batches of max 3 rows are formed as min_insert_block_size_rows = 3. # Blocks: # 1. Failed batch that is retried with the same contents. # 2. Full batch of inserts before ALTER. # 3. Full batch of inserts before ALTER. # 4. Full batch of inserts after ALTER (that have different block structure). # 5. What was left to insert with the column structure before ALTER. expected = '''\ 20000101_20000101_1_1_0\t[1] 20000101_20000101_2_2_0\t[2,3,4] 20000101_20000101_3_3_0\t[5,6,7] 20000101_20000101_4_4_0\t[10,11,12] 20000101_20000101_5_5_0\t[8,9] ''' assert TSV(result) == TSV(expected)
def test_kafka_no_holes_when_write_suffix_failed(kafka_cluster): messages = [ json.dumps({ 'key': j + 1, 'value': 'x' * 300 }) for j in range(22) ] kafka_produce('no_holes_when_write_suffix_failed', messages) instance.query(''' DROP TABLE IF EXISTS test.view; DROP TABLE IF EXISTS test.consumer; CREATE TABLE test.kafka (key UInt64, value String) ENGINE = Kafka SETTINGS kafka_broker_list = 'kafka1:19092', kafka_topic_list = 'no_holes_when_write_suffix_failed', kafka_group_name = 'no_holes_when_write_suffix_failed', kafka_format = 'JSONEachRow', kafka_max_block_size = 20; CREATE TABLE test.view (key UInt64, value String) ENGINE = ReplicatedMergeTree('/clickhouse/kafkatest/tables/no_holes_when_write_suffix_failed', 'node1') ORDER BY key; CREATE MATERIALIZED VIEW test.consumer TO test.view AS SELECT * FROM test.kafka WHERE NOT sleepEachRow(1); ''') # the tricky part here is that disconnect should happen after write prefix, but before write suffix # so i use sleepEachRow with PartitionManager() as pm: time.sleep(12) pm.drop_instance_zk_connections(instance) time.sleep(20) pm.heal_all # connection restored and it will take a while until next block will be flushed # it takes years on CI :\ time.sleep(90) # as it's a bit tricky to hit the proper moment - let's check in logs if we did it correctly assert instance.contains_in_log( "ZooKeeper session has been expired.: while write prefix to view") result = instance.query( 'SELECT count(), uniqExact(key), max(key) FROM test.view') print(result) # kafka_cluster.open_bash_shell('instance') instance.query(''' DROP TABLE test.consumer; DROP TABLE test.view; ''') assert TSV(result) == TSV('22\t22\t22')
def test_non_leader_replica(started_cluster): node1.query_with_retry( """CREATE TABLE IF NOT EXISTS sometable(id UInt32, value String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/sometable', '1') ORDER BY tuple()""" ) node2.query_with_retry( """CREATE TABLE IF NOT EXISTS sometable(id UInt32, value String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/sometable', '2') ORDER BY tuple() SETTINGS replicated_can_become_leader = 0""" ) node1.query( "INSERT INTO sometable SELECT number, toString(number) FROM numbers(100)" ) node2.query_with_retry("SYSTEM SYNC REPLICA sometable", timeout=10) assert node1.query("SELECT COUNT() FROM sometable") == "100\n" assert node2.query("SELECT COUNT() FROM sometable") == "100\n" with PartitionManager() as pm: pm.drop_instance_zk_connections(node1) # this query should be executed by leader, but leader partitioned from zookeeper with pytest.raises(Exception): node2.query( "ALTER TABLE sometable ON CLUSTER 'test_cluster' MODIFY COLUMN value UInt64 SETTINGS distributed_ddl_task_timeout=5" ) for _ in range(100): if "UInt64" in node1.query( "SELECT type FROM system.columns WHERE name='value' and table = 'sometable'" ): break time.sleep(0.1) for _ in range(100): if "UInt64" in node2.query( "SELECT type FROM system.columns WHERE name='value' and table = 'sometable'" ): break time.sleep(0.1) assert "UInt64" in node1.query( "SELECT type FROM system.columns WHERE name='value' and table = 'sometable'" ) assert "UInt64" in node2.query( "SELECT type FROM system.columns WHERE name='value' and table = 'sometable'" ) # Checking that DDLWorker doesn't hung and still able to execute DDL queries node1.query( "CREATE TABLE new_table_with_ddl ON CLUSTER 'test_cluster' (key UInt32) ENGINE=MergeTree() ORDER BY tuple()", settings={"distributed_ddl_task_timeout": "10"}, ) assert node1.query("EXISTS new_table_with_ddl") == "1\n" assert node2.query("EXISTS new_table_with_ddl") == "1\n"
def test_atomic_delete_with_stopped_zookeeper(start_cluster): node1.query("insert into zktest.atomic_drop_table values (8192)") with PartitionManager() as pm: pm.drop_instance_zk_connections(node1) error = node1.query_and_get_error("DROP TABLE zktest.atomic_drop_table") # Table won't drop assert error != "" time.sleep(5) assert '8192' in node1.query("select * from zktest.atomic_drop_table")
def test_inconsistent_parts_if_drop_while_replica_not_active(start_cluster): with PartitionManager() as pm: # insert into all replicas for i in range(10): node1.query( "INSERT INTO test_table VALUES ('2019-08-16', {})".format(i)) assert_eq_with_retry(node2, "SELECT count(*) FROM test_table", node1.query("SELECT count(*) FROM test_table")) # partition the first replica from the second one and (later) from zk pm.partition_instances(node1, node2) # insert some parts on the second replica only, we will drop these parts for i in range(10): node2.query( "INSERT INTO test_table VALUES ('2019-08-16', {})".format(10 + i)) pm.drop_instance_zk_connections(node1) # drop all parts on the second replica node2.query_with_retry("ALTER TABLE test_table DROP PARTITION 201908") assert_eq_with_retry(node2, "SELECT count(*) FROM test_table", "0") # insert into the second replica # DROP_RANGE will be removed from the replication log and the first replica will be lost for i in range(20): node2.query( "INSERT INTO test_table VALUES ('2019-08-16', {})".format(20 + i)) assert_eq_with_retry( node2, "SELECT value FROM system.zookeeper WHERE path='/clickhouse/tables/test1/replicated/replicas/node1' AND name='is_lost'", "1") for i in range(30): if node2.contains_in_log("Will mark replica node1 as lost"): break time.sleep(0.5) # the first replica will be cloned from the second pm.heal_all() assert_eq_with_retry(node1, "SELECT count(*) FROM test_table", node2.query("SELECT count(*) FROM test_table")) # ensure replica was cloned assert node1.contains_in_log("Will mimic node2") # queue must be empty (except some merges that are possibly executing right now) assert node1.query( "SELECT count() FROM system.replication_queue WHERE type != 'MERGE_PARTS'" ) == "0\n" assert node2.query( "SELECT count() FROM system.replication_queue WHERE type != 'MERGE_PARTS'" ) == "0\n"
def network_partition_test(clickhouse_node, mysql_node, service_name): clickhouse_node.query("DROP DATABASE IF EXISTS test_database") clickhouse_node.query("DROP DATABASE IF EXISTS test") mysql_node.query("DROP DATABASE IF EXISTS test_database") mysql_node.query("DROP DATABASE IF EXISTS test") mysql_node.query("CREATE DATABASE test_database;") mysql_node.query( "CREATE TABLE test_database.test_table ( `id` int(11) NOT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB;" ) mysql_node.query("CREATE DATABASE test;") clickhouse_node.query( "CREATE DATABASE test_database ENGINE = MaterializeMySQL('{}:3306', 'test_database', 'root', 'clickhouse')" .format(service_name)) check_query(clickhouse_node, "SELECT * FROM test_database.test_table", '') with PartitionManager() as pm: drop_instance_mysql_connections(clickhouse_node, pm) mysql_node.query('INSERT INTO test_database.test_table VALUES(1)') check_query(clickhouse_node, "SELECT * FROM test_database.test_table", '') with pytest.raises(QueryRuntimeException) as exception: clickhouse_node.query( "CREATE DATABASE test ENGINE = MaterializeMySQL('{}:3306', 'test', 'root', 'clickhouse')" .format(service_name)) assert "Can't connect to MySQL server" in str(exception.value) restore_instance_mysql_connections(clickhouse_node, pm) clickhouse_node.query("DETACH DATABASE test_database") clickhouse_node.query("ATTACH DATABASE test_database") check_query(clickhouse_node, "SELECT * FROM test_database.test_table FORMAT TSV", '1\n') clickhouse_node.query( "CREATE DATABASE test ENGINE = MaterializeMySQL('{}:3306', 'test', 'root', 'clickhouse')" .format(service_name)) check_query(clickhouse_node, "SHOW TABLES FROM test_database FORMAT TSV", "test_table\n") mysql_node.query( "CREATE TABLE test.test ( `id` int(11) NOT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB;" ) check_query(clickhouse_node, "SHOW TABLES FROM test FORMAT TSV", "test\n") clickhouse_node.query("DROP DATABASE test_database") clickhouse_node.query("DROP DATABASE test") mysql_node.query("DROP DATABASE test_database") mysql_node.query("DROP DATABASE test")
def _test_on_connection_losses(test_cluster, zk_timeout): instance = test_cluster.instances['ch1'] kill_instance = test_cluster.instances['ch2'] with PartitionManager() as pm: pm.drop_instance_zk_connections(kill_instance) request = instance.get_query_request("DROP TABLE IF EXISTS test.__nope__ ON CLUSTER 'cluster'", timeout=10) time.sleep(zk_timeout) pm.restore_instance_zk_connections(kill_instance) test_cluster.check_all_hosts_successfully_executed(request.get_answer())
def test_inserts_batching(started_cluster): instance = instance_test_inserts_batching with PartitionManager() as pm: pm.partition_instances(instance, remote) instance.query("INSERT INTO local2_source(d, x) VALUES ('2000-01-01', 1)") # Sleep a bit so that this INSERT forms a batch of its own. time.sleep(0.2) instance.query("INSERT INTO local2_source(x, d) VALUES (2, '2000-01-01')") for i in range(3, 7): instance.query("INSERT INTO local2_source(d, x) VALUES ('2000-01-01', {})".format(i)) for i in range(7, 9): instance.query("INSERT INTO local2_source(x, d) VALUES ({}, '2000-01-01')".format(i)) instance.query("INSERT INTO local2_source(d, x) VALUES ('2000-01-01', 9)") # After ALTER the structure of the saved blocks will be different instance.query("DROP TABLE local2_view") instance.query("ALTER TABLE distributed ADD COLUMN s String") # Memory Engine doesn't support ALTER so we just DROP/CREATE everything instance.query("DROP TABLE local2_source") instance.query("CREATE TABLE local2_source (d Date, x UInt32, s String) ENGINE = Memory") instance.query("CREATE MATERIALIZED VIEW local2_view to distributed AS SELECT d,x,s FROM local2_source") for i in range(10, 13): instance.query("INSERT INTO local2_source(d, x) VALUES ('2000-01-01', {})".format(i)) time.sleep(1.0) result = remote.query("SELECT _part, groupArray(x) FROM local2 GROUP BY _part ORDER BY _part") # Explanation: as merges are turned off on remote instance, active parts in local2 table correspond 1-to-1 # to inserted blocks. # Batches of max 3 rows are formed as min_insert_block_size_rows = 3. # Blocks: # 1. Failed batch that is retried with the same contents. # 2. Full batch of inserts regardless of the order of columns thanks to the view. # 3. Full batch of inserts regardless order of columns thanks to the view. # 4. Full batch of inserts after ALTER (that have different block structure). # 5. What was left to insert before ALTER. expected = '''\ 20000101_20000101_1_1_0 [1] 20000101_20000101_2_2_0 [2,3,4] 20000101_20000101_3_3_0 [5,6,7] 20000101_20000101_4_4_0 [10,11,12] 20000101_20000101_5_5_0 [8,9] ''' assert TSV(result) == TSV(expected)
def test_readonly_metrics(start_cluster): assert ( node1.query("SELECT value FROM system.metrics WHERE metric = 'ReadonlyReplica'") == "0\n" ) with PartitionManager() as pm: ## make node1 readonly -> heal -> readonly -> heal -> detach table -> heal -> attach table pm.drop_instance_zk_connections(node1) assert_eq_with_retry( node1, "SELECT value FROM system.metrics WHERE metric = 'ReadonlyReplica'", "1\n", retry_count=300, sleep_time=1, ) pm.heal_all() assert_eq_with_retry( node1, "SELECT value FROM system.metrics WHERE metric = 'ReadonlyReplica'", "0\n", retry_count=300, sleep_time=1, ) pm.drop_instance_zk_connections(node1) assert_eq_with_retry( node1, "SELECT value FROM system.metrics WHERE metric = 'ReadonlyReplica'", "1\n", retry_count=300, sleep_time=1, ) node1.query("DETACH TABLE test.test_table") assert "0\n" == node1.query( "SELECT value FROM system.metrics WHERE metric = 'ReadonlyReplica'" ) pm.heal_all() node1.query("ATTACH TABLE test.test_table") assert_eq_with_retry( node1, "SELECT value FROM system.metrics WHERE metric = 'ReadonlyReplica'", "0\n", retry_count=300, sleep_time=1, )
def test_attach_without_zk(start_cluster): node1.query( "CREATE TABLE test4_r1 (n UInt64) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test4/', 'r1') ORDER BY n" ) node1.query("DETACH TABLE test4_r1") with PartitionManager() as pm: pm._add_rule({ 'probability': 0.5, 'source': node1.ip_address, 'destination_port': 2181, 'action': 'DROP' }) try: node1.query("ATTACH TABLE test4_r1") except: pass node1.query("ATTACH TABLE IF NOT EXISTS test4_r1") node1.query("SELECT * FROM test4_r1")