def test_cdc_alter_table_drop_column(scylla_only, cql, test_keyspace): schema = "pk int primary key, v int" extra = " with cdc = {'enabled': true}" with new_test_table(cql, test_keyspace, schema, extra) as table: cql.execute(f"insert into {table} (pk, v) values (0, 0)") cql.execute(f"insert into {table} (pk, v) values (1, null)") flush(cql, table) flush(cql, table + "_scylla_cdc_log") cql.execute(f"alter table {table} drop v") cql.execute(f"select * from {table}_scylla_cdc_log")
def simple_no_clustering_table(cql, keyspace): table = util.unique_name() schema = f"CREATE TABLE {keyspace}.{table} (pk int PRIMARY KEY , v int)" cql.execute(schema) for pk in range(0, 10): cql.execute(f"INSERT INTO {keyspace}.{table} (pk, v) VALUES ({pk}, 0)") nodetool.flush(cql, f"{keyspace}.{table}") return table, schema
def table_with_counters(cql, keyspace): table = util.unique_name() schema = f"CREATE TABLE {keyspace}.{table} (pk int PRIMARY KEY, v counter)" cql.execute(schema) for pk in range(0, 10): for c in range(0, 4): cql.execute(f"UPDATE {keyspace}.{table} SET v = v + 1 WHERE pk = {pk};") nodetool.flush(cql, f"{keyspace}.{table}") return table, schema
def clustering_table_with_collection(cql, keyspace): table = util.unique_name() schema = f"CREATE TABLE {keyspace}.{table} (pk int, ck int, v map<int, text>, PRIMARY KEY (pk, ck))" cql.execute(schema) for pk in range(0, 10): for ck in range(0, 10): map_vals = {f"{p}: '{c}'" for p in range(0, pk) for c in range(0, ck)} map_str = ", ".join(map_vals) cql.execute(f"INSERT INTO {keyspace}.{table} (pk, ck, v) VALUES ({pk}, {ck}, {{{map_str}}})") nodetool.flush(cql, f"{keyspace}.{table}") return table, schema
def clustering_table_with_udt(cql, keyspace): table = util.unique_name() create_type_schema = f"CREATE TYPE {keyspace}.type1 (f1 int, f2 text)" create_table_schema = f" CREATE TABLE {keyspace}.{table} (pk int, ck int, v type1, PRIMARY KEY (pk, ck))" cql.execute(create_type_schema) cql.execute(create_table_schema) for pk in range(0, 10): for ck in range(0, 10): cql.execute(f"INSERT INTO {keyspace}.{table} (pk, ck, v) VALUES ({pk}, {ck}, {{f1: 100, f2: 'asd'}})") nodetool.flush(cql, f"{keyspace}.{table}") return table, "; ".join((create_type_schema, create_table_schema))
def test_partitions_estimate_only_deletions(cassandra_bug, cql, test_keyspace): N = 1000 with new_test_table(cql, test_keyspace, 'k int PRIMARY KEY') as table: delete = cql.prepare(f"DELETE FROM {table} WHERE k=?") for i in range(N): cql.execute(delete, [i]) nodetool.flush(cql, table) nodetool.refreshsizeestimates(cql) table_name = table[len(test_keyspace)+1:] counts = [x.partitions_count for x in cql.execute( f"SELECT partitions_count FROM system.size_estimates WHERE keyspace_name = '{test_keyspace}' AND table_name = '{table_name}'")] count = sum(counts) print(counts) print(count) # Count should be close to 0, not to N assert count < N/1.25
def test_create_large_static_cells_and_rows(cql, test_keyspace): '''Test that `large_data_handler` successfully reports large static cells and static rows and this doesn't cause a crash of Scylla server. This is a regression test for https://github.com/scylladb/scylla/issues/6780''' schema = "pk int, ck int, user_ids set<text> static, PRIMARY KEY (pk, ck)" with new_test_table(cql, test_keyspace, schema) as table: insert_stmt = cql.prepare( f"INSERT INTO {table} (pk, ck, user_ids) VALUES (?, ?, ?)") # Default large data threshold for cells is 1 mb, for rows it is 10 mb. # Take 10 mb cell to trigger large data reporting code both for # static cells and static rows simultaneously. large_set = {'x' * 1024 * 1024 * 10} cql.execute(insert_stmt, [1, 1, large_set]) nodetool.flush(cql, table)
def test_mv_empty_string_partition_key(cql, test_keyspace): schema = 'p int, v text, primary key (p)' with new_test_table(cql, test_keyspace, schema) as table: with new_materialized_view(cql, table, '*', 'v, p', 'v is not null and p is not null') as mv: cql.execute(f"INSERT INTO {table} (p,v) VALUES (123, '')") # Note that because cql-pytest runs on a single node, view # updates are synchronous, and we can read the view immediately # without retrying. In a general setup, this test would require # retries. # The view row with the empty partition key should exist. # In #9375, this failed in Scylla: assert list(cql.execute(f"SELECT * FROM {mv}")) == [('', 123)] # Verify that we can flush an sstable with just an one partition # with an empty-string key (in the past we had a summary-file # sanity check preventing this from working). nodetool.flush(cql, mv)
def test_mv_empty_string_partition_key_individual(cassandra_bug, cql, test_keyspace): schema = 'p int, v text, primary key (p)' with new_test_table(cql, test_keyspace, schema) as table: with new_materialized_view(cql, table, '*', 'v, p', 'v is not null and p is not null') as mv: # Insert a bunch of (p,v) rows. One of the v's is the empty # string, which we would like to test, but let's insert more # rows to make it more likely to exercise various possibilities # of token ordering (see #9352). rows = [[123, ''], [1, 'dog'], [2, 'cat'], [700, 'hello'], [3, 'horse']] for row in rows: cql.execute( f"INSERT INTO {table} (p,v) VALUES ({row[0]}, '{row[1]}')") # Note that because cql-pytest runs on a single node, view # updates are synchronous, and we can read the view immediately # without retrying. In a general setup, this test would require # retries. # Check that we can read the individual partition with the # empty-string key: assert list(cql.execute(f"SELECT * FROM {mv} WHERE v=''")) == [ ('', 123) ] # The SELECT above works from cache. However, empty partition # keys also used to be special-cased and be buggy when reading # and writing sstables, so let's verify that the empty partition # key can actually be written and read from disk, by forcing a # memtable flush and bypassing the cache on read. # In the past Scylla used to fail this flush because the sstable # layer refused to write empty partition keys to the sstable: nodetool.flush(cql, mv) # First try a full-table scan, and then try to read the # individual partition with the empty key: assert set(cql.execute(f"SELECT * FROM {mv} BYPASS CACHE")) == { (x[1], x[0]) for x in rows } # Issue #9352 used to prevent us finding WHERE v='' here, even # when the data is known to exist (the above full-table scan # saw it!) and despite the fact that WHERE v='' is parsed # correctly because we tested above it works from memtables. assert list( cql.execute( f"SELECT * FROM {mv} WHERE v='' BYPASS CACHE")) == [('', 123)]
def write_table_and_estimate_partitions(cql, test_keyspace, N): with new_test_table(cql, test_keyspace, 'k int PRIMARY KEY') as table: write = cql.prepare(f"INSERT INTO {table} (k) VALUES (?)") for i in range(N): cql.execute(write, [i]) # Both Cassandra and Scylla do not include memtable data in their # estimates, so a nodetool.flush() is required to get a count. nodetool.flush(cql, table) # In Cassandra, the estimates may not be available until a # nodetool.refreshsizeestimates(). In Scylla it is not needed. nodetool.refreshsizeestimates(cql) # The size_estimates table has, for a keyspace/table partition, a # separate row for separate token ranges. We need to sum those up. table_name = table[len(test_keyspace)+1:] counts = [x.partitions_count for x in cql.execute( f"SELECT partitions_count FROM system.size_estimates WHERE keyspace_name = '{test_keyspace}' AND table_name = '{table_name}'")] count = sum(counts) print(counts) print(count) return count
def test_partitions_estimate_full_overlap(cassandra_bug, cql, test_keyspace): N = 500 with new_test_table(cql, test_keyspace, 'k int PRIMARY KEY') as table: write = cql.prepare(f"INSERT INTO {table} (k) VALUES (?)") for i in range(N): cql.execute(write, [i]) nodetool.flush(cql, table) # And a second copy of the *same* data will end up in a second sstable: for i in range(N): cql.execute(write, [i]) nodetool.flush(cql, table) # TODO: In Scylla we should use NullCompactionStrategy to avoid the two # sstables from immediately being compacted together. nodetool.refreshsizeestimates(cql) table_name = table[len(test_keyspace)+1:] counts = [x.partitions_count for x in cql.execute( f"SELECT partitions_count FROM system.size_estimates WHERE keyspace_name = '{test_keyspace}' AND table_name = '{table_name}'")] count = sum(counts) print(counts) print(count) assert count > N/1.5 and count < N*1.5
def test_twcs_optimal_query_path(cql, test_keyspace, scylla_only): with new_test_table( cql, test_keyspace, "pk int, ck int, v int, PRIMARY KEY (pk, ck)", " WITH COMPACTION = {" + " 'compaction_window_size': '1'," + " 'compaction_window_unit': 'MINUTES'," + " 'class': 'org.apache.cassandra.db.compaction.TimeWindowCompactionStrategy' }" ) as table: cql.execute(f"INSERT INTO {table} (pk, ck, v) VALUES (0, 0, 0)") # Obviously, scanning the table should now return exactly one row: assert 1 == len( list(cql.execute(f"SELECT * FROM {table} WHERE pk = 0"))) # We will now flush the memtable to disk, and execute the same # query again with BYPASS CACHE, to be sure to exercise the code that # reads from sstables. We will obviously expect to see the same one # result. Issue #8138 caused here zero results, as well as a crash # in the debug build. nodetool.flush(cql, table) assert 1 == len( list( cql.execute( f"SELECT * FROM {table} WHERE pk = 0 BYPASS CACHE")))
def flush(cql, table): nodetool.flush(cql, table)