def test_concurrent_queries(started_cluster): conn = get_postgres_conn(started_cluster.postgres_ip, started_cluster.postgres_port, database=False) cursor = conn.cursor() database_name = 'concurrent_test' cursor.execute(f'DROP DATABASE IF EXISTS {database_name}') cursor.execute(f'CREATE DATABASE {database_name}') conn = get_postgres_conn(started_cluster.postgres_ip, started_cluster.postgres_port, database=True, database_name=database_name) cursor = conn.cursor() cursor.execute('CREATE TABLE test_table (key integer, value integer)') node1.query(f''' CREATE TABLE test.test_table (key UInt32, value UInt32) ENGINE = PostgreSQL(postgres1, database='{database_name}', table='test_table') ''') node1.query(f''' CREATE TABLE test.stat (numbackends UInt32, datname String) ENGINE = PostgreSQL(postgres1, database='{database_name}', table='pg_stat_database') ''') def node_select(_): for i in range(20): result = node1.query("SELECT * FROM test.test_table", user='******') def node_insert(_): for i in range(20): result = node1.query("INSERT INTO test.test_table SELECT number, number FROM numbers(1000)", user='******') def node_insert_select(_): for i in range(20): result = node1.query("INSERT INTO test.test_table SELECT number, number FROM numbers(1000)", user='******') result = node1.query("SELECT * FROM test.test_table LIMIT 100", user='******') busy_pool = Pool(30) p = busy_pool.map_async(node_select, range(30)) p.wait() count = int(node1.query(f"SELECT numbackends FROM test.stat WHERE datname = '{database_name}'")) print(count) assert(count <= 18) busy_pool = Pool(30) p = busy_pool.map_async(node_insert, range(30)) p.wait() count = int(node1.query(f"SELECT numbackends FROM test.stat WHERE datname = '{database_name}'")) print(count) assert(count <= 18) busy_pool = Pool(30) p = busy_pool.map_async(node_insert_select, range(30)) p.wait() count = int(node1.query(f"SELECT numbackends FROM test.stat WHERE datname = '{database_name}'")) print(count) assert(count <= 18) node1.query('DROP TABLE test.test_table;') node1.query('DROP TABLE test.stat;')
def test_postgresql_database_engine_queries(started_cluster): conn = get_postgres_conn(started_cluster.postgres_ip, started_cluster.postgres_port, database=True) cursor = conn.cursor() node1.query( "CREATE DATABASE postgres_database ENGINE = PostgreSQL('postgres1:5432', 'postgres_database', 'postgres', 'mysecretpassword')" ) create_postgres_table(cursor, "test_table") assert (node1.query( "SELECT count() FROM postgres_database.test_table").rstrip() == "0") node1.query( "INSERT INTO postgres_database.test_table SELECT number, number from numbers(10000)" ) assert (node1.query("SELECT count() FROM postgres_database.test_table"). rstrip() == "10000") drop_postgres_table(cursor, "test_table") assert "test_table" not in node1.query( "SHOW TABLES FROM postgres_database") node1.query("DROP DATABASE postgres_database") assert "postgres_database" not in node1.query("SHOW DATABASES")
def test_postgres_database_engine_with_postgres_ddl(started_cluster): # connect to database as well conn = get_postgres_conn(started_cluster.postgres_ip, started_cluster.postgres_port, database=True) cursor = conn.cursor() node1.query( "CREATE DATABASE postgres_database ENGINE = PostgreSQL('postgres1:5432', 'postgres_database', 'postgres', 'mysecretpassword')" ) assert "postgres_database" in node1.query("SHOW DATABASES") create_postgres_table(cursor, "test_table") assert "test_table" in node1.query("SHOW TABLES FROM postgres_database") cursor.execute("ALTER TABLE test_table ADD COLUMN data Text") assert "data" in node1.query( "SELECT name FROM system.columns WHERE table = 'test_table' AND database = 'postgres_database'" ) cursor.execute("ALTER TABLE test_table DROP COLUMN data") assert "data" not in node1.query( "SELECT name FROM system.columns WHERE table = 'test_table' AND database = 'postgres_database'" ) node1.query("DROP DATABASE postgres_database") assert "postgres_database" not in node1.query("SHOW DATABASES") drop_postgres_table(cursor, "test_table")
def test_postgresql_database_engine_with_clickhouse_ddl(started_cluster): conn = get_postgres_conn(started_cluster.postgres_ip, started_cluster.postgres_port, database=True) cursor = conn.cursor() node1.query( "CREATE DATABASE postgres_database ENGINE = PostgreSQL('postgres1:5432', 'postgres_database', 'postgres', 'mysecretpassword')" ) create_postgres_table(cursor, "test_table") assert "test_table" in node1.query("SHOW TABLES FROM postgres_database") node1.query("DROP TABLE postgres_database.test_table") assert "test_table" not in node1.query( "SHOW TABLES FROM postgres_database") node1.query("ATTACH TABLE postgres_database.test_table") assert "test_table" in node1.query("SHOW TABLES FROM postgres_database") node1.query("DETACH TABLE postgres_database.test_table") assert "test_table" not in node1.query( "SHOW TABLES FROM postgres_database") node1.query("ATTACH TABLE postgres_database.test_table") assert "test_table" in node1.query("SHOW TABLES FROM postgres_database") node1.query("DROP DATABASE postgres_database") assert "postgres_database" not in node1.query("SHOW DATABASES") drop_postgres_table(cursor, "test_table")
def test_postgresql_database_with_schema(started_cluster): conn = get_postgres_conn(started_cluster.postgres_ip, started_cluster.postgres_port, database=True) cursor = conn.cursor() cursor.execute("CREATE SCHEMA test_schema") cursor.execute("CREATE TABLE test_schema.table1 (a integer)") cursor.execute("CREATE TABLE test_schema.table2 (a integer)") cursor.execute("CREATE TABLE table3 (a integer)") node1.query( "CREATE DATABASE postgres_database ENGINE = PostgreSQL('postgres1:5432', 'postgres_database', 'postgres', 'mysecretpassword', 'test_schema')" ) assert node1.query( "SHOW TABLES FROM postgres_database") == "table1\ntable2\n" node1.query( "INSERT INTO postgres_database.table1 SELECT number from numbers(10000)" ) assert (node1.query( "SELECT count() FROM postgres_database.table1").rstrip() == "10000") node1.query("DETACH TABLE postgres_database.table1") node1.query("ATTACH TABLE postgres_database.table1") assert (node1.query( "SELECT count() FROM postgres_database.table1").rstrip() == "10000") node1.query("DROP DATABASE postgres_database") cursor.execute("DROP SCHEMA test_schema CASCADE") cursor.execute("DROP TABLE table3")
def test_get_create_table_query_with_multidim_arrays(started_cluster): conn = get_postgres_conn(started_cluster.postgres_ip, started_cluster.postgres_port, database=True) cursor = conn.cursor() node1.query( "CREATE DATABASE postgres_database ENGINE = PostgreSQL('postgres1:5432', 'postgres_database', 'postgres', 'mysecretpassword')" ) cursor.execute(""" CREATE TABLE array_columns ( b Integer[][][] NOT NULL, c Integer[][][] )""") node1.query("DETACH TABLE postgres_database.array_columns") node1.query("ATTACH TABLE postgres_database.array_columns") node1.query( "INSERT INTO postgres_database.array_columns " "VALUES (" "[[[1, 1], [1, 1]], [[3, 3], [3, 3]], [[4, 4], [5, 5]]], " "[[[1, NULL], [NULL, 1]], [[NULL, NULL], [NULL, NULL]], [[4, 4], [5, 5]]] " ")") result = node1.query(""" SELECT * FROM postgres_database.array_columns""") expected = ( "[[[1,1],[1,1]],[[3,3],[3,3]],[[4,4],[5,5]]]\t" "[[[1,NULL],[NULL,1]],[[NULL,NULL],[NULL,NULL]],[[4,4],[5,5]]]\n") assert result == expected node1.query("DROP DATABASE postgres_database") assert "postgres_database" not in node1.query("SHOW DATABASES") drop_postgres_table(cursor, "array_columns")
def test_single_transaction(started_cluster): conn = get_postgres_conn( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True, auto_commit=False, ) cursor = conn.cursor() table_name = "postgresql_replica_0" create_postgres_table(cursor, table_name) conn.commit() pg_manager.create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) assert_nested_table_is_created(instance, table_name) for query in queries: print("query {}".format(query)) cursor.execute(query.format(0)) time.sleep(5) result = instance.query(f"select count() from test_database.{table_name}") # no commit yet assert int(result) == 0 conn.commit() check_tables_are_synchronized(instance, table_name)
def test_load_dictionaries(started_cluster): conn = get_postgres_conn( ip=started_cluster.postgres_ip, database=True, port=started_cluster.postgres_port, ) cursor = conn.cursor() table_name = "test0" create_and_fill_postgres_table( cursor, table_name, port=started_cluster.postgres_port, host=started_cluster.postgres_ip, ) create_dict(table_name) dict_name = "dict0" node1.query(f"SYSTEM RELOAD DICTIONARY {dict_name}") assert (node1.query(f"SELECT count() FROM `test`.`dict_table_{table_name}`" ).rstrip() == "10000") assert (node1.query( f"SELECT dictGetUInt32('{dict_name}', 'key', toUInt64(0))") == "0\n") assert (node1.query( f"SELECT dictGetUInt32('{dict_name}', 'value', toUInt64(9999))") == "9999\n") cursor.execute(f"DROP TABLE IF EXISTS {table_name}") node1.query(f"DROP TABLE IF EXISTS {table_name}") node1.query(f"DROP DICTIONARY IF EXISTS {dict_name}")
def test_bad_configuration(started_cluster): conn = get_postgres_conn( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True, ) cursor = conn.cursor() node1.query(""" DROP DICTIONARY IF EXISTS postgres_dict; CREATE DICTIONARY postgres_dict (id UInt32, value UInt32) PRIMARY KEY id SOURCE(POSTGRESQL( port 5432 host 'postgres1' user 'postgres' password 'mysecretpassword' dbbb 'postgres_database' table 'test_schema.test_table')) LIFETIME(MIN 1 MAX 2) LAYOUT(HASHED()); """) node1.query_and_get_error( "SELECT dictGetUInt32(postgres_dict, 'value', toUInt64(1))") assert node1.contains_in_log("Unexpected key `dbbb`")
def transaction(thread_id): if thread_id % 2: conn = get_postgres_conn(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True, auto_commit=True) else: conn = get_postgres_conn(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True, auto_commit=False) cursor = conn.cursor() for query in queries: cursor.execute(query.format(thread_id)) print('thread {}, query {}'.format(thread_id, query)) if thread_id % 2 == 0: conn.commit()
def test_auto_close_connection(started_cluster): conn = get_postgres_conn(started_cluster.postgres_ip, started_cluster.postgres_port, database=False) cursor = conn.cursor() database_name = "auto_close_connection_test" cursor.execute(f"DROP DATABASE IF EXISTS {database_name}") cursor.execute(f"CREATE DATABASE {database_name}") conn = get_postgres_conn( started_cluster.postgres_ip, started_cluster.postgres_port, database=True, database_name=database_name, ) cursor = conn.cursor() cursor.execute("CREATE TABLE test_table (key integer, value integer)") node2.query(f""" CREATE TABLE test.test_table (key UInt32, value UInt32) ENGINE = PostgreSQL(postgres1, database='{database_name}', table='test_table') """) result = node2.query( "INSERT INTO test.test_table SELECT number, number FROM numbers(1000)", user="******", ) result = node2.query("SELECT * FROM test.test_table LIMIT 100", user="******") node2.query(f""" CREATE TABLE test.stat (numbackends UInt32, datname String) ENGINE = PostgreSQL(postgres1, database='{database_name}', table='pg_stat_database') """) count = int( node2.query( f"SELECT numbackends FROM test.stat WHERE datname = '{database_name}'" )) # Connection from python + pg_stat table also has a connection at the moment of current query assert count == 2
def started_cluster(): try: cluster.start() conn = get_postgres_conn(cluster.postgres_ip, cluster.postgres_port) cursor = conn.cursor() create_postgres_db(cursor, "postgres_database") yield cluster finally: cluster.shutdown()
def started_cluster(): try: cluster.start() node1.query("CREATE DATABASE IF NOT EXISTS test") postgres_conn = get_postgres_conn(ip=cluster.postgres_ip, port=cluster.postgres_port) logging.debug("postgres1 connected") create_postgres_db(postgres_conn, "postgres_database") postgres2_conn = get_postgres_conn(ip=cluster.postgres2_ip, port=cluster.postgres_port) logging.debug("postgres2 connected") create_postgres_db(postgres2_conn, "postgres_database") yield cluster finally: cluster.shutdown()
def test_dictionary_with_replicas(started_cluster): conn1 = get_postgres_conn( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True, ) cursor1 = conn1.cursor() conn2 = get_postgres_conn( ip=started_cluster.postgres2_ip, port=started_cluster.postgres_port, database=True, ) cursor2 = conn2.cursor() create_postgres_table(cursor1, "test1") create_postgres_table(cursor2, "test1") cursor1.execute( "INSERT INTO test1 select i, i, i from generate_series(0, 99) as t(i);" ) cursor2.execute( "INSERT INTO test1 select i, i, i from generate_series(100, 199) as t(i);" ) create_dict("test1", 1) result = node1.query( "SELECT * FROM `test`.`dict_table_test1` ORDER BY key") # priority 0 - non running port assert node1.contains_in_log("PostgreSQLConnectionPool: Connection error*") # priority 1 - postgres2, table contains rows with values 100-200 # priority 2 - postgres1, table contains rows with values 0-100 expected = node1.query("SELECT number, number FROM numbers(100, 100)") assert result == expected cursor1.execute("DROP TABLE IF EXISTS test1") cursor2.execute("DROP TABLE IF EXISTS test1") node1.query("DROP TABLE IF EXISTS test1") node1.query("DROP DICTIONARY IF EXISTS dict1")
def test_invalidate_query(started_cluster): conn = get_postgres_conn( ip=started_cluster.postgres_ip, database=True, port=started_cluster.postgres_port, ) cursor = conn.cursor() table_name = "test0" create_and_fill_postgres_table( cursor, table_name, port=started_cluster.postgres_port, host=started_cluster.postgres_ip, ) # invalidate query: SELECT value FROM test0 WHERE id = 0 dict_name = "dict0" create_dict(table_name) node1.query(f"SYSTEM RELOAD DICTIONARY {dict_name}") assert (node1.query( f"SELECT dictGetUInt32('{dict_name}', 'value', toUInt64(0))") == "0\n") assert (node1.query( f"SELECT dictGetUInt32('{dict_name}', 'value', toUInt64(1))") == "1\n") # update should happen cursor.execute(f"UPDATE {table_name} SET value=value+1 WHERE id = 0") while True: result = node1.query( f"SELECT dictGetUInt32('{dict_name}', 'value', toUInt64(0))") if result != "0\n": break assert (node1.query( f"SELECT dictGetUInt32('{dict_name}', 'value', toUInt64(0))") == "1\n") # no update should happen cursor.execute(f"UPDATE {table_name} SET value=value*2 WHERE id != 0") time.sleep(5) assert (node1.query( f"SELECT dictGetUInt32('{dict_name}', 'value', toUInt64(0))") == "1\n") assert (node1.query( f"SELECT dictGetUInt32('{dict_name}', 'value', toUInt64(1))") == "1\n") # update should happen cursor.execute(f"UPDATE {table_name} SET value=value+1 WHERE id = 0") time.sleep(5) assert (node1.query( f"SELECT dictGetUInt32('{dict_name}', 'value', toUInt64(0))") == "2\n") assert (node1.query( f"SELECT dictGetUInt32('{dict_name}', 'value', toUInt64(1))") == "2\n") node1.query(f"DROP TABLE IF EXISTS {table_name}") node1.query(f"DROP DICTIONARY IF EXISTS {dict_name}") cursor.execute(f"DROP TABLE IF EXISTS {table_name}")
def test_postgres_database_old_syntax(started_cluster): conn = get_postgres_conn(started_cluster.postgres_ip, started_cluster.postgres_port, database=True) cursor = conn.cursor() node1.query(""" CREATE DATABASE postgres_database ENGINE = PostgreSQL('postgres1:5432', 'postgres_database', 'postgres', 'mysecretpassword', 1); """) create_postgres_table(cursor, "test_table") assert "test_table" in node1.query("SHOW TABLES FROM postgres_database") cursor.execute(f"DROP TABLE test_table") node1.query("DROP DATABASE IF EXISTS postgres_database;")
def test_postgresql_database_engine_table_cache(started_cluster): conn = get_postgres_conn(started_cluster.postgres_ip, started_cluster.postgres_port, database=True) cursor = conn.cursor() node1.query( "CREATE DATABASE postgres_database ENGINE = PostgreSQL('postgres1:5432', 'postgres_database', 'postgres', 'mysecretpassword', '', 1)" ) create_postgres_table(cursor, "test_table") assert (node1.query("DESCRIBE TABLE postgres_database.test_table").rstrip( ) == "id\tInt32\t\t\t\t\t\nvalue\tNullable(Int32)") cursor.execute("ALTER TABLE test_table ADD COLUMN data Text") assert (node1.query("DESCRIBE TABLE postgres_database.test_table").rstrip( ) == "id\tInt32\t\t\t\t\t\nvalue\tNullable(Int32)") node1.query("DETACH TABLE postgres_database.test_table") assert "test_table" not in node1.query( "SHOW TABLES FROM postgres_database") node1.query("ATTACH TABLE postgres_database.test_table") assert "test_table" in node1.query("SHOW TABLES FROM postgres_database") assert ( node1.query("DESCRIBE TABLE postgres_database.test_table").rstrip() == "id\tInt32\t\t\t\t\t\nvalue\tNullable(Int32)\t\t\t\t\t\ndata\tNullable(String)" ) node1.query("DROP TABLE postgres_database.test_table") assert "test_table" not in node1.query( "SHOW TABLES FROM postgres_database") node1.query("ATTACH TABLE postgres_database.test_table") assert "test_table" in node1.query("SHOW TABLES FROM postgres_database") node1.query( "INSERT INTO postgres_database.test_table SELECT number, number, toString(number) from numbers(10000)" ) assert (node1.query("SELECT count() FROM postgres_database.test_table"). rstrip() == "10000") cursor.execute("DROP TABLE test_table;") assert "test_table" not in node1.query( "SHOW TABLES FROM postgres_database") node1.query("DROP DATABASE postgres_database") assert "postgres_database" not in node1.query("SHOW DATABASES")
def test_postgres_dictionaries_custom_query_partial_load_complex_key( started_cluster): conn = get_postgres_conn( ip=started_cluster.postgres_ip, database=True, port=started_cluster.postgres_port, ) cursor = conn.cursor() cursor.execute( "CREATE TABLE IF NOT EXISTS test_table_1 (id Integer, key Text, value_1 Text);" ) cursor.execute( "CREATE TABLE IF NOT EXISTS test_table_2 (id Integer, key Text, value_2 Text);" ) cursor.execute("INSERT INTO test_table_1 VALUES (1, 'Key', 'Value_1');") cursor.execute("INSERT INTO test_table_2 VALUES (1, 'Key', 'Value_2');") query = node1.query query(f""" CREATE DICTIONARY test_dictionary_custom_query ( id UInt64, key String, value_1 String, value_2 String ) PRIMARY KEY id, key LAYOUT(COMPLEX_KEY_DIRECT()) SOURCE(PostgreSQL( DB 'postgres_database' HOST '{started_cluster.postgres_ip}' PORT {started_cluster.postgres_port} USER 'postgres' PASSWORD 'mysecretpassword' QUERY $doc$SELECT id, key, value_1, value_2 FROM test_table_1 INNER JOIN test_table_2 USING (id, key) WHERE {{condition}};$doc$)) """) result = query( "SELECT dictGet('test_dictionary_custom_query', ('value_1', 'value_2'), (toUInt64(1), 'Key'))" ) assert result == "('Value_1','Value_2')\n" query("DROP DICTIONARY test_dictionary_custom_query;") cursor.execute("DROP TABLE test_table_2;") cursor.execute("DROP TABLE test_table_1;")
def test_virtual_columns(started_cluster): conn = get_postgres_conn( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True, ) cursor = conn.cursor() table_name = "postgresql_replica_0" create_postgres_table(cursor, table_name) pg_manager.create_materialized_db( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, settings=["materialized_postgresql_allow_automatic_update = 1"], ) assert_nested_table_is_created(instance, table_name) instance.query( f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers(10)" ) check_tables_are_synchronized(instance, table_name) # just check that it works, no check with `expected` because _version is taken as LSN, which will be different each time. result = instance.query( f"SELECT key, value, _sign, _version FROM test_database.{table_name};") print(result) cursor.execute(f"ALTER TABLE {table_name} ADD COLUMN value2 integer") instance.query( f"INSERT INTO postgres_database.{table_name} SELECT number, number, number from numbers(10, 10)" ) assert_number_of_columns(instance, 3, table_name) check_tables_are_synchronized(instance, table_name) result = instance.query( "SELECT key, value, value2, _sign, _version FROM test_database.postgresql_replica_0;" ) print(result) instance.query( f"INSERT INTO postgres_database.{table_name} SELECT number, number, number from numbers(20, 10)" ) check_tables_are_synchronized(instance, table_name) result = instance.query( f"SELECT key, value, value2, _sign, _version FROM test_database.{table_name};" ) print(result)
def test_postgres_dictionaries_custom_query_full_load(started_cluster): conn = get_postgres_conn( ip=started_cluster.postgres_ip, database=True, port=started_cluster.postgres_port, ) cursor = conn.cursor() cursor.execute( "CREATE TABLE IF NOT EXISTS test_table_1 (id Integer, value_1 Text);") cursor.execute( "CREATE TABLE IF NOT EXISTS test_table_2 (id Integer, value_2 Text);") cursor.execute("INSERT INTO test_table_1 VALUES (1, 'Value_1');") cursor.execute("INSERT INTO test_table_2 VALUES (1, 'Value_2');") query = node1.query query(f""" CREATE DICTIONARY test_dictionary_custom_query ( id UInt64, value_1 String, value_2 String ) PRIMARY KEY id LAYOUT(FLAT()) SOURCE(PostgreSQL( DB 'postgres_database' HOST '{started_cluster.postgres_ip}' PORT {started_cluster.postgres_port} USER 'postgres' PASSWORD 'mysecretpassword' QUERY $doc$SELECT id, value_1, value_2 FROM test_table_1 INNER JOIN test_table_2 USING (id);$doc$)) LIFETIME(0) """) result = query( "SELECT id, value_1, value_2 FROM test_dictionary_custom_query") assert result == "1\tValue_1\tValue_2\n" query("DROP DICTIONARY test_dictionary_custom_query;") cursor.execute("DROP TABLE test_table_2;") cursor.execute("DROP TABLE test_table_1;")
def test_replica_identity_index(started_cluster): conn = get_postgres_conn( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True, ) cursor = conn.cursor() create_postgres_table(cursor, "postgresql_replica", template=postgres_table_template_3) cursor.execute( "CREATE unique INDEX idx on postgresql_replica(key1, key2);") cursor.execute( "ALTER TABLE postgresql_replica REPLICA IDENTITY USING INDEX idx") instance.query( "INSERT INTO postgres_database.postgresql_replica SELECT number, number, number, number from numbers(50, 10)" ) pg_manager.create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) instance.query( "INSERT INTO postgres_database.postgresql_replica SELECT number, number, number, number from numbers(100, 10)" ) check_tables_are_synchronized(instance, "postgresql_replica", order_by="key1") cursor.execute( "UPDATE postgresql_replica SET key1=key1-25 WHERE key1<100 ") cursor.execute( "UPDATE postgresql_replica SET key2=key2-25 WHERE key2>100 ") cursor.execute( "UPDATE postgresql_replica SET value1=value1+100 WHERE key1<100 ") cursor.execute( "UPDATE postgresql_replica SET value2=value2+200 WHERE key2>100 ") check_tables_are_synchronized(instance, "postgresql_replica", order_by="key1") cursor.execute("DELETE FROM postgresql_replica WHERE key2<75;") check_tables_are_synchronized(instance, "postgresql_replica", order_by="key1")
def test_replicating_dml(started_cluster): conn = get_postgres_conn( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True, ) cursor = conn.cursor() NUM_TABLES = 5 for i in range(NUM_TABLES): create_postgres_table(cursor, "postgresql_replica_{}".format(i)) instance.query( "INSERT INTO postgres_database.postgresql_replica_{} SELECT number, {} from numbers(50)" .format(i, i)) pg_manager.create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) for i in range(NUM_TABLES): instance.query( "INSERT INTO postgres_database.postgresql_replica_{} SELECT 50 + number, {} from numbers(1000)" .format(i, i)) check_several_tables_are_synchronized(instance, NUM_TABLES) for i in range(NUM_TABLES): cursor.execute( "UPDATE postgresql_replica_{} SET value = {} * {} WHERE key < 50;". format(i, i, i)) cursor.execute( "UPDATE postgresql_replica_{} SET value = {} * {} * {} WHERE key >= 50;" .format(i, i, i, i)) check_several_tables_are_synchronized(instance, NUM_TABLES) for i in range(NUM_TABLES): cursor.execute( "DELETE FROM postgresql_replica_{} WHERE (value*value + {}) % 2 = 0;" .format(i, i)) cursor.execute( "UPDATE postgresql_replica_{} SET value = value - (value % 7) WHERE key > 128 AND key < 512;" .format(i)) cursor.execute( "DELETE FROM postgresql_replica_{} WHERE key % 7 = 1;".format( i, i)) check_several_tables_are_synchronized(instance, NUM_TABLES)
def test_changing_replica_identity_value(started_cluster): conn = get_postgres_conn(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True) cursor = conn.cursor() create_postgres_table(cursor, 'postgresql_replica') instance.query( "INSERT INTO postgres_database.postgresql_replica SELECT 50 + number, number from numbers(50)" ) pg_manager.create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) instance.query( "INSERT INTO postgres_database.postgresql_replica SELECT 100 + number, number from numbers(50)" ) check_tables_are_synchronized(instance, 'postgresql_replica') cursor.execute("UPDATE postgresql_replica SET key=key-25 WHERE key<100 ") check_tables_are_synchronized(instance, 'postgresql_replica')
def test_postgresql_fetch_tables(started_cluster): conn = get_postgres_conn(started_cluster.postgres_ip, started_cluster.postgres_port, database=True) cursor = conn.cursor() cursor.execute("DROP SCHEMA IF EXISTS test_schema CASCADE") cursor.execute("CREATE SCHEMA test_schema") cursor.execute("CREATE TABLE test_schema.table1 (a integer)") cursor.execute("CREATE TABLE test_schema.table2 (a integer)") cursor.execute("CREATE TABLE table3 (a integer)") node1.query( "CREATE DATABASE postgres_database ENGINE = PostgreSQL('postgres1:5432', 'postgres_database', 'postgres', 'mysecretpassword')" ) assert node1.query("SHOW TABLES FROM postgres_database") == "table3\n" assert not node1.contains_in_log("PostgreSQL table table1 does not exist") cursor.execute(f"DROP TABLE table3") cursor.execute("DROP SCHEMA IF EXISTS test_schema CASCADE")
def test_user_managed_slots(started_cluster): slot_name = "user_slot" table_name = "test_table" pg_manager.create_and_fill_postgres_table(table_name) replication_connection = get_postgres_conn( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True, replication=True, auto_commit=True, ) snapshot = create_replication_slot(replication_connection, slot_name=slot_name) pg_manager.create_materialized_db( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, settings=[ f"materialized_postgresql_replication_slot = '{slot_name}'", f"materialized_postgresql_snapshot = '{snapshot}'", ], ) check_tables_are_synchronized(instance, table_name) instance.query( "INSERT INTO postgres_database.{} SELECT number, number from numbers(10000, 10000)" .format(table_name)) check_tables_are_synchronized(instance, table_name) instance.restart_clickhouse() instance.query( "INSERT INTO postgres_database.{} SELECT number, number from numbers(20000, 10000)" .format(table_name)) check_tables_are_synchronized(instance, table_name) pg_manager.drop_materialized_db() drop_replication_slot(replication_connection, slot_name) replication_connection.close()
def test_postgres_schema(started_cluster): conn = get_postgres_conn( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True, ) cursor = conn.cursor() cursor.execute("CREATE SCHEMA test_schema") cursor.execute( "CREATE TABLE test_schema.test_table (id integer, value integer)") cursor.execute( "INSERT INTO test_schema.test_table SELECT i, i FROM generate_series(0, 99) as t(i)" ) node1.query(""" DROP DICTIONARY IF EXISTS postgres_dict; CREATE DICTIONARY postgres_dict (id UInt32, value UInt32) PRIMARY KEY id SOURCE(POSTGRESQL( port 5432 host 'postgres1' user 'postgres' password 'mysecretpassword' db 'postgres_database' table 'test_schema.test_table')) LIFETIME(MIN 1 MAX 2) LAYOUT(HASHED()); """) result = node1.query( "SELECT dictGetUInt32(postgres_dict, 'value', toUInt64(1))") assert int(result.strip()) == 1 result = node1.query( "SELECT dictGetUInt32(postgres_dict, 'value', toUInt64(99))") assert int(result.strip()) == 99 node1.query("DROP DICTIONARY IF EXISTS postgres_dict") cursor.execute("DROP TABLE test_schema.test_table") cursor.execute("DROP SCHEMA test_schema")
def test_multiple_databases(started_cluster): NUM_TABLES = 5 conn = get_postgres_conn( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=False, ) cursor = conn.cursor() pg_manager.create_postgres_db(cursor, "postgres_database_1") pg_manager.create_postgres_db(cursor, "postgres_database_2") conn1 = get_postgres_conn( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True, database_name="postgres_database_1", ) conn2 = get_postgres_conn( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True, database_name="postgres_database_2", ) cursor1 = conn1.cursor() cursor2 = conn2.cursor() pg_manager.create_clickhouse_postgres_db( cluster.postgres_ip, cluster.postgres_port, "postgres_database_1", "postgres_database_1", ) pg_manager.create_clickhouse_postgres_db( cluster.postgres_ip, cluster.postgres_port, "postgres_database_2", "postgres_database_2", ) cursors = [cursor1, cursor2] for cursor_id in range(len(cursors)): for i in range(NUM_TABLES): table_name = "postgresql_replica_{}".format(i) create_postgres_table(cursors[cursor_id], table_name) instance.query( "INSERT INTO postgres_database_{}.{} SELECT number, number from numbers(50)" .format(cursor_id + 1, table_name)) print( "database 1 tables: ", instance.query( """SELECT name FROM system.tables WHERE database = 'postgres_database_1';""" ), ) print( "database 2 tables: ", instance.query( """SELECT name FROM system.tables WHERE database = 'postgres_database_2';""" ), ) pg_manager.create_materialized_db( started_cluster.postgres_ip, started_cluster.postgres_port, "test_database_1", "postgres_database_1", ) pg_manager.create_materialized_db( started_cluster.postgres_ip, started_cluster.postgres_port, "test_database_2", "postgres_database_2", ) cursors = [cursor1, cursor2] for cursor_id in range(len(cursors)): for i in range(NUM_TABLES): table_name = "postgresql_replica_{}".format(i) instance.query( "INSERT INTO postgres_database_{}.{} SELECT 50 + number, number from numbers(50)" .format(cursor_id + 1, table_name)) for cursor_id in range(len(cursors)): for i in range(NUM_TABLES): table_name = "postgresql_replica_{}".format(i) check_tables_are_synchronized( instance, table_name, "key", "postgres_database_{}".format(cursor_id + 1), "test_database_{}".format(cursor_id + 1), )
def test_many_concurrent_queries(started_cluster): query_pool = [ "DELETE FROM postgresql_replica_{} WHERE (value*value) % 3 = 0;", "UPDATE postgresql_replica_{} SET value = value - 125 WHERE key % 2 = 0;", "DELETE FROM postgresql_replica_{} WHERE key % 10 = 0;", "UPDATE postgresql_replica_{} SET value = value*5 WHERE key % 2 = 1;", "DELETE FROM postgresql_replica_{} WHERE value % 2 = 0;", "UPDATE postgresql_replica_{} SET value = value + 2000 WHERE key % 5 = 0;", "DELETE FROM postgresql_replica_{} WHERE value % 3 = 0;", "UPDATE postgresql_replica_{} SET value = value * 2 WHERE key % 3 = 0;", "DELETE FROM postgresql_replica_{} WHERE value % 9 = 2;", "UPDATE postgresql_replica_{} SET value = value + 2 WHERE key % 3 = 1;", "DELETE FROM postgresql_replica_{} WHERE value%5 = 0;", ] NUM_TABLES = 5 conn = get_postgres_conn( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True, ) cursor = conn.cursor() pg_manager.create_and_fill_postgres_tables_from_cursor(cursor, NUM_TABLES, numbers=10000) def attack(thread_id): print("thread {}".format(thread_id)) k = 10000 for i in range(20): query_id = random.randrange(0, len(query_pool) - 1) table_id = random.randrange(0, 5) # num tables # random update / delete query cursor.execute(query_pool[query_id].format(table_id)) print("table {} query {} ok".format(table_id, query_id)) # allow some thread to do inserts (not to violate key constraints) if thread_id < 5: print("try insert table {}".format(thread_id)) instance.query( "INSERT INTO postgres_database.postgresql_replica_{} SELECT {}*10000*({} + number), number from numbers(1000)" .format(i, thread_id, k)) k += 1 print("insert table {} ok".format(thread_id)) if i == 5: # also change primary key value print("try update primary key {}".format(thread_id)) cursor.execute( "UPDATE postgresql_replica_{} SET key=key%100000+100000*{} WHERE key%{}=0" .format(thread_id, i + 1, i + 1)) print("update primary key {} ok".format(thread_id)) n = [10000] threads = [] threads_num = 16 for i in range(threads_num): threads.append(threading.Thread(target=attack, args=(i, ))) pg_manager.create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) for thread in threads: time.sleep(random.uniform(0, 1)) thread.start() n[0] = 50000 for table_id in range(NUM_TABLES): n[0] += 1 instance.query( "INSERT INTO postgres_database.postgresql_replica_{} SELECT {} + number, number from numbers(5000)" .format(table_id, n[0])) # cursor.execute("UPDATE postgresql_replica_{} SET key=key%100000+100000*{} WHERE key%{}=0".format(table_id, table_id+1, table_id+1)) for thread in threads: thread.join() for i in range(NUM_TABLES): check_tables_are_synchronized(instance, "postgresql_replica_{}".format(i)) count1 = instance.query( "SELECT count() FROM postgres_database.postgresql_replica_{}". format(i)) count2 = instance.query( "SELECT count() FROM (SELECT * FROM test_database.postgresql_replica_{})" .format(i)) assert int(count1) == int(count2) print(count1, count2)
def test_table_schema_changes(started_cluster): conn = get_postgres_conn( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True, ) cursor = conn.cursor() NUM_TABLES = 5 for i in range(NUM_TABLES): create_postgres_table( cursor, "postgresql_replica_{}".format(i), template=postgres_table_template_2, ) instance.query( "INSERT INTO postgres_database.postgresql_replica_{} SELECT number, {}, {}, {} from numbers(25)" .format(i, i, i, i)) pg_manager.create_materialized_db( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, settings=["materialized_postgresql_allow_automatic_update = 1"], ) for i in range(NUM_TABLES): instance.query( "INSERT INTO postgres_database.postgresql_replica_{} SELECT 25 + number, {}, {}, {} from numbers(25)" .format(i, i, i, i)) check_several_tables_are_synchronized(instance, NUM_TABLES) expected = instance.query( "SELECT key, value1, value3 FROM test_database.postgresql_replica_3 ORDER BY key" ) altered_idx = random.randint(0, 4) altered_table = f"postgresql_replica_{altered_idx}" cursor.execute(f"ALTER TABLE {altered_table} DROP COLUMN value2") for i in range(NUM_TABLES): cursor.execute( f"INSERT INTO postgresql_replica_{i} VALUES (50, {i}, {i})") cursor.execute(f"UPDATE {altered_table} SET value3 = 12 WHERE key%2=0") time.sleep(2) assert_nested_table_is_created(instance, altered_table) assert_number_of_columns(instance, 3, altered_table) check_tables_are_synchronized(instance, altered_table) print("check1 OK") check_several_tables_are_synchronized(instance, NUM_TABLES) for i in range(NUM_TABLES): if i != altered_idx: instance.query( "INSERT INTO postgres_database.postgresql_replica_{} SELECT 51 + number, {}, {}, {} from numbers(49)" .format(i, i, i, i)) else: instance.query( "INSERT INTO postgres_database.postgresql_replica_{} SELECT 51 + number, {}, {} from numbers(49)" .format(i, i, i)) check_tables_are_synchronized(instance, altered_table) print("check2 OK") check_several_tables_are_synchronized(instance, NUM_TABLES)
def test_different_data_types(started_cluster): conn = get_postgres_conn( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True, ) cursor = conn.cursor() cursor.execute("drop table if exists test_data_types;") cursor.execute("drop table if exists test_array_data_type;") cursor.execute("""CREATE TABLE test_data_types ( id integer PRIMARY KEY, a smallint, b integer, c bigint, d real, e double precision, f serial, g bigserial, h timestamp, i date, j decimal(5, 5), k numeric(5, 5))""") cursor.execute("""CREATE TABLE test_array_data_type ( key Integer NOT NULL PRIMARY KEY, a Date[] NOT NULL, -- Date b Timestamp[] NOT NULL, -- DateTime64(6) c real[][] NOT NULL, -- Float32 d double precision[][] NOT NULL, -- Float64 e decimal(5, 5)[][][] NOT NULL, -- Decimal32 f integer[][][] NOT NULL, -- Int32 g Text[][][][][] NOT NULL, -- String h Integer[][][], -- Nullable(Int32) i Char(2)[][][][], -- Nullable(String) k Char(2)[] -- Nullable(String) )""") pg_manager.create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) for i in range(10): instance.query(""" INSERT INTO postgres_database.test_data_types VALUES ({}, -32768, -2147483648, -9223372036854775808, 1.12345, 1.1234567890, 2147483647, 9223372036854775807, '2000-05-12 12:12:12.012345', '2000-05-12', 0.2, 0.2)""" .format(i)) check_tables_are_synchronized(instance, "test_data_types", "id") result = instance.query( "SELECT * FROM test_database.test_data_types ORDER BY id LIMIT 1;") assert ( result == "0\t-32768\t-2147483648\t-9223372036854775808\t1.12345\t1.123456789\t2147483647\t9223372036854775807\t2000-05-12 12:12:12.012345\t2000-05-12\t0.2\t0.2\n" ) for i in range(10): col = random.choice(["a", "b", "c"]) cursor.execute("UPDATE test_data_types SET {} = {};".format(col, i)) cursor.execute( """UPDATE test_data_types SET i = '2020-12-12';""".format(col, i)) check_tables_are_synchronized(instance, "test_data_types", "id") instance.query( "INSERT INTO postgres_database.test_array_data_type " "VALUES (" "0, " "['2000-05-12', '2000-05-12'], " "['2000-05-12 12:12:12.012345', '2000-05-12 12:12:12.012345'], " "[[1.12345], [1.12345], [1.12345]], " "[[1.1234567891], [1.1234567891], [1.1234567891]], " "[[[0.11111, 0.11111]], [[0.22222, 0.22222]], [[0.33333, 0.33333]]], " "[[[1, 1], [1, 1]], [[3, 3], [3, 3]], [[4, 4], [5, 5]]], " "[[[[['winx', 'winx', 'winx']]]]], " "[[[1, NULL], [NULL, 1]], [[NULL, NULL], [NULL, NULL]], [[4, 4], [5, 5]]], " "[[[[NULL]]]], " "[]" ")") expected = ( "0\t" + "['2000-05-12','2000-05-12']\t" + "['2000-05-12 12:12:12.012345','2000-05-12 12:12:12.012345']\t" + "[[1.12345],[1.12345],[1.12345]]\t" + "[[1.1234567891],[1.1234567891],[1.1234567891]]\t" + "[[[0.11111,0.11111]],[[0.22222,0.22222]],[[0.33333,0.33333]]]\t" "[[[1,1],[1,1]],[[3,3],[3,3]],[[4,4],[5,5]]]\t" "[[[[['winx','winx','winx']]]]]\t" "[[[1,NULL],[NULL,1]],[[NULL,NULL],[NULL,NULL]],[[4,4],[5,5]]]\t" "[[[[NULL]]]]\t" "[]\n") check_tables_are_synchronized(instance, "test_array_data_type") result = instance.query( "SELECT * FROM test_database.test_array_data_type ORDER BY key;") assert result == expected pg_manager.drop_materialized_db() cursor.execute("drop table if exists test_data_types;") cursor.execute("drop table if exists test_array_data_type;")