Пример #1
0
    def test_gc(self):
        """
        Test that tombstone purging doesn't bring back deleted data by writing
        2 rows to a table with gc_grace=0, deleting one of those rows, then
        asserting that it isn't present in the results of SELECT *, before and
        after a flush and compaction.
        """
        cluster = self.cluster

        cluster.populate(1).start()
        [node1] = cluster.nodelist()

        time.sleep(.5)
        session = self.patient_cql_connection(node1)
        create_ks(session, 'ks', 1)
        create_cf(session, 'cf', gc_grace=0, key_type='int', columns={'c1': 'int'})

        session.execute('insert into cf (key, c1) values (1,1)')
        session.execute('insert into cf (key, c1) values (2,1)')
        node1.flush()

        assert rows_to_list(session.execute('select * from cf;')) == [[1, 1], [2, 1]]

        session.execute('delete from cf where key=1')

        assert rows_to_list(session.execute('select * from cf;')) == [[2, 1]]

        node1.flush()
        time.sleep(.5)
        node1.compact()
        time.sleep(.5)

        assert rows_to_list(session.execute('select * from cf;')) == [[2, 1]]
Пример #2
0
    def test_gc(self):
        """
        Test that tombstone purging doesn't bring back deleted data by writing
        2 rows to a table with gc_grace=0, deleting one of those rows, then
        asserting that it isn't present in the results of SELECT *, before and
        after a flush and compaction.
        """
        cluster = self.cluster

        cluster.populate(1).start()
        [node1] = cluster.nodelist()

        time.sleep(.5)
        session = self.patient_cql_connection(node1)
        create_ks(session, 'ks', 1)
        create_cf(session, 'cf', gc_grace=0, key_type='int', columns={'c1': 'int'})

        session.execute('insert into cf (key, c1) values (1,1)')
        session.execute('insert into cf (key, c1) values (2,1)')
        node1.flush()

        assert rows_to_list(session.execute('select * from cf;')) == [[1, 1], [2, 1]]

        session.execute('delete from cf where key=1')

        assert rows_to_list(session.execute('select * from cf;')) == [[2, 1]]

        node1.flush()
        time.sleep(.5)
        node1.compact()
        time.sleep(.5)

        assert rows_to_list(session.execute('select * from cf;')) == [[2, 1]]
    def _token_gen_test(self, nodes, randomPart=None):
        generated_tokens, session = self.prepare(randomPart, nodes=nodes)
        dc_tokens = generated_tokens[0]

        tokens = []
        local_tokens = rows_to_list(
            session.execute("SELECT tokens FROM system.local"))[0]
        self.assertEqual(local_tokens.__len__(), 1, "too many tokens for peer")
        for tok in local_tokens:
            tokens += tok

        rows = rows_to_list(session.execute("SELECT tokens FROM system.peers"))
        self.assertEqual(rows.__len__(), nodes - 1)
        for row in rows:
            peer_tokens = row[0]
            self.assertEqual(peer_tokens.__len__(), 1,
                             "too many tokens for peer")
            for tok in peer_tokens:
                tokens.append(tok)

        self.assertEqual(tokens.__len__(), dc_tokens.__len__())
        for cluster_token in tokens:
            tok = int(cluster_token)
            self.assertGreaterEqual(
                dc_tokens.index(tok), 0,
                "token in cluster does not match generated tokens")
Пример #4
0
 def query_user(self, session, userid, age, consistency, check_ret=True):
     statement = SimpleStatement("SELECT userid, age FROM users where userid = {}".format(userid), consistency_level=consistency)
     res = session.execute(statement)
     expected = [[userid, age]] if age else []
     ret = rows_to_list(res) == expected
     if check_ret:
         self.assertTrue(ret, "Got {} from {}, expected {} at {}".format(rows_to_list(res), session.cluster.contact_points, expected, consistency_value_to_name(consistency)))
     return ret
Пример #5
0
 def query_user(self, session, userid, age, consistency, check_ret=True):
     statement = SimpleStatement("SELECT userid, age FROM users where userid = {}".format(userid), consistency_level=consistency)
     res = session.execute(statement)
     expected = [[userid, age]] if age else []
     ret = rows_to_list(res) == expected
     if check_ret:
         self.assertTrue(ret, "Got {} from {}, expected {} at {}".format(rows_to_list(res), session.cluster.contact_points, expected, consistency_value_to_name(consistency)))
     return ret
    def test_commitlog_replay_on_startup(self):
        """
        Test commit log replay
        """
        node1 = self.node1
        node1.set_batch_commitlog(enabled=True)
        node1.start()

        debug("Insert data")
        session = self.patient_cql_connection(node1)
        create_ks(session, 'Test', 1)
        session.execute("""
            CREATE TABLE users (
                user_name varchar PRIMARY KEY,
                password varchar,
                gender varchar,
                state varchar,
                birth_year bigint
            );
        """)
        session.execute("INSERT INTO Test. users (user_name, password, gender, state, birth_year) "
                        "VALUES('gandalf', 'p@$$', 'male', 'WA', 1955);")

        debug("Verify data is present")
        session = self.patient_cql_connection(node1)
        res = session.execute("SELECT * FROM Test. users")
        self.assertItemsEqual(rows_to_list(res),
                              [[u'gandalf', 1955, u'male', u'p@$$', u'WA']])

        debug("Stop node abruptly")
        node1.stop(gently=False)

        debug("Verify commitlog was written before abrupt stop")
        commitlog_dir = os.path.join(node1.get_path(), 'commitlogs')
        commitlog_files = os.listdir(commitlog_dir)
        self.assertTrue(len(commitlog_files) > 0)

        debug("Verify no SSTables were flushed before abrupt stop")
        self.assertEqual(0, len(node1.get_sstables('test', 'users')))

        debug("Verify commit log was replayed on startup")
        node1.start()
        node1.watch_log_for("Log replay complete")
        # Here we verify from the logs that some mutations were replayed
        replays = [match_tuple[0] for match_tuple in node1.grep_log(" \d+ replayed mutations")]
        debug('The following log lines indicate that mutations were replayed: {msgs}'.format(msgs=replays))
        num_replayed_mutations = [
            parse('{} {num_mutations:d} replayed mutations{}', line).named['num_mutations']
            for line in replays
        ]
        # assert there were some lines where more than zero mutations were replayed
        self.assertNotEqual([m for m in num_replayed_mutations if m > 0], [])

        debug("Make query and ensure data is present")
        session = self.patient_cql_connection(node1)
        res = session.execute("SELECT * FROM Test. users")
        self.assertItemsEqual(rows_to_list(res),
                              [[u'gandalf', 1955, u'male', u'p@$$', u'WA']])
Пример #7
0
    def test_commitlog_replay_on_startup(self):
        """
        Test commit log replay
        """
        node1 = self.node1
        node1.set_batch_commitlog(enabled=True)
        node1.start()

        logger.debug("Insert data")
        session = self.patient_cql_connection(node1)
        create_ks(session, 'Test', 1)
        session.execute("""
            CREATE TABLE users (
                user_name varchar PRIMARY KEY,
                password varchar,
                gender varchar,
                state varchar,
                birth_year bigint
            );
        """)
        session.execute("INSERT INTO Test. users (user_name, password, gender, state, birth_year) "
                        "VALUES('gandalf', 'p@$$', 'male', 'WA', 1955);")

        logger.debug("Verify data is present")
        session = self.patient_cql_connection(node1)
        res = session.execute("SELECT * FROM Test. users")
        assert rows_to_list(res) == [['gandalf', 1955, 'male', 'p@$$', 'WA']]

        logger.debug("Stop node abruptly")
        node1.stop(gently=False)

        logger.debug("Verify commitlog was written before abrupt stop")
        commitlog_dir = os.path.join(node1.get_path(), 'commitlogs')
        commitlog_files = os.listdir(commitlog_dir)
        assert len(commitlog_files) > 0

        logger.debug("Verify no SSTables were flushed before abrupt stop")
        assert 0 == len(node1.get_sstables('test', 'users'))

        logger.debug("Verify commit log was replayed on startup")
        node1.start()
        node1.watch_log_for("Log replay complete")
        # Here we verify from the logs that some mutations were replayed
        replays = [match_tuple[0] for match_tuple in node1.grep_log(r" \d+ replayed mutations")]
        logger.debug('The following log lines indicate that mutations were replayed: {msgs}'.format(msgs=replays))
        num_replayed_mutations = [
            parse('{} {num_mutations:d} replayed mutations{}', line).named['num_mutations']
            for line in replays
        ]
        # assert there were some lines where more than zero mutations were replayed
        assert [m for m in num_replayed_mutations if m > 0] != []

        logger.debug("Make query and ensure data is present")
        session = self.patient_cql_connection(node1)
        res = session.execute("SELECT * FROM Test. users")
        assert_lists_equal_ignoring_order(rows_to_list(res), [['gandalf', 1955, 'male', 'p@$$', 'WA']])
    def distribution_template(self, ratio_spec, expected_ratio, delta):
        """
        @param ratio_spec the string passed to `row-population-ratio` in the call to `cassandra-stress`
        @param expected_ratio the expected ratio of null/non-null values in the values written
        @param delta the acceptable delta between the expected and actual ratios

        A parameterized test for the `row-population-ratio` parameter to
        `cassandra-stress`.
        """
        self.cluster.populate(1).start(wait_for_binary_proto=True)
        node = self.cluster.nodelist()[0]
        node.stress([
            'write', 'n=1000', 'no-warmup', '-rate', 'threads=50', '-col',
            'n=FIXED(50)', '-insert',
            'row-population-ratio={ratio_spec}'.format(ratio_spec=ratio_spec)
        ])
        session = self.patient_cql_connection(node)
        written = rows_to_list(
            session.execute('SELECT * FROM keyspace1.standard1;'))

        num_nones = sum(row.count(None) for row in written)
        num_results = sum(len(row) for row in written)

        self.assertAlmostEqual(float(num_nones) / num_results,
                               expected_ratio,
                               delta=delta)
Пример #9
0
    def test_compact_counter_cluster(self):
        """
        @jira_ticket CASSANDRA-12219
        This test will fail on 3.0.0 - 3.0.8, and 3.1 - 3.8
        """
        cluster = self.cluster
        cluster.populate(3).start()
        node1 = cluster.nodelist()[0]
        session = self.patient_cql_connection(node1)
        create_ks(session, 'counter_tests', 1)

        session.execute("""
            CREATE TABLE IF NOT EXISTS counter_cs (
                key bigint PRIMARY KEY,
                data counter
            ) WITH COMPACT STORAGE
            """)

        for outer in range(0, 5):
            for idx in range(0, 5):
                session.execute(
                    "UPDATE counter_cs SET data = data + 1 WHERE key = {k}".
                    format(k=idx))

        for idx in range(0, 5):
            row = list(
                session.execute(
                    "SELECT data from counter_cs where key = {k}".format(
                        k=idx)))
            assert rows_to_list(row)[0][0] == 5
Пример #10
0
 def check_data_on_each_replica(self, expect_fully_repaired,
                                initial_replica):
     """
     Perform a SELECT * query at CL.ONE on each replica in turn. If expect_fully_repaired is True, we verify that
     each replica returns the full row being queried. If not, then we only verify that the 'a' column has been
     repaired.
     """
     stmt = SimpleStatement("SELECT * FROM alter_rf_test.t1 WHERE k=1",
                            consistency_level=ConsistencyLevel.ONE)
     logger.debug(
         "Checking all if read repair has completed on all replicas")
     for n in self.cluster.nodelist():
         logger.debug("Checking {n}, {x}expecting all columns".format(
             n=n.name,
             x=""
             if expect_fully_repaired or n == initial_replica else "not "))
         session = self.patient_exclusive_cql_connection(n)
         res = rows_to_list(session.execute(stmt))
         logger.debug("Actual result: " + str(res))
         expected = [[
             1, 1, 1
         ]] if expect_fully_repaired or n == initial_replica else [[
             1, 1, None
         ]]
         if res != expected:
             raise NotRepairedException()
Пример #11
0
    def compact_counter_cluster_test(self):
        """
        @jira_ticket CASSANDRA-12219
        This test will fail on 3.0.0 - 3.0.8, and 3.1 - 3.8
        """

        cluster = self.cluster
        cluster.populate(3).start()
        node1 = cluster.nodelist()[0]
        session = self.patient_cql_connection(node1)
        create_ks(session, 'counter_tests', 1)

        session.execute("""
            CREATE TABLE IF NOT EXISTS counter_cs (
                key bigint PRIMARY KEY,
                data counter
            ) WITH COMPACT STORAGE
            """)

        for outer in range(0, 5):
            for idx in range(0, 5):
                session.execute("UPDATE counter_cs SET data = data + 1 WHERE key = {k}".format(k=idx))

        for idx in range(0, 5):
            row = list(session.execute("SELECT data from counter_cs where key = {k}".format(k=idx)))
            self.assertEqual(rows_to_list(row)[0][0], 5)
Пример #12
0
 def query_counter(self, session, id, val, consistency, check_ret=True):
     statement = SimpleStatement("SELECT * from counters WHERE id = {}".format(id), consistency_level=consistency)
     ret = rows_to_list(session.execute(statement))
     if check_ret:
         self.assertEqual(ret[0][1], val, "Got {} from {}, expected {} at {}".format(ret[0][1],
                                                                                     session.cluster.contact_points,
                                                                                     val,
                                                                                     consistency_value_to_name(consistency)))
     return ret[0][1] if ret else 0
Пример #13
0
 def query_counter(self, session, id, val, consistency, check_ret=True):
     statement = SimpleStatement("SELECT * from counters WHERE id = {}".format(id), consistency_level=consistency)
     ret = rows_to_list(session.execute(statement))
     if check_ret:
         self.assertEqual(ret[0][1], val, "Got {} from {}, expected {} at {}".format(ret[0][1],
                                                                                     session.cluster.contact_points,
                                                                                     val,
                                                                                     consistency_value_to_name(consistency)))
     return ret[0][1] if ret else 0
 def _fetch_initial_data(self,
                         table='keyspace1.standard1',
                         cl=ConsistencyLevel.THREE,
                         limit=10000):
     debug("Fetching initial data from {} on {} with CL={} and LIMIT={}".
           format(table, self.query_node.name, cl, limit))
     session = self.patient_cql_connection(self.query_node)
     query = SimpleStatement('select * from {} LIMIT {}'.format(
         table, limit),
                             consistency_level=cl)
     return rows_to_list(session.execute(query))
Пример #15
0
def _insert_rows(session, table_name, insert_stmt, values):
    prepared_insert = session.prepare(insert_stmt)
    values = list(values)  # in case values is a generator
    execute_concurrent(session, ((prepared_insert, x) for x in values),
                       concurrency=500, raise_on_first_error=True)

    data_loaded = rows_to_list(session.execute('SELECT * FROM ' + table_name))
    logger.debug('{n} rows inserted into {table_name}'.format(n=len(data_loaded), table_name=table_name))
    # use assert_equal over assert_length_equal to avoid printing out
    # potentially large lists
    assert len(values) == len(data_loaded)
    return data_loaded
    def _token_gen_test(self, nodes, randomPart=None):
        generated_tokens, session = self.prepare(randomPart, nodes=nodes)
        dc_tokens = generated_tokens[0]

        tokens = []
        local_tokens = rows_to_list(session.execute("SELECT tokens FROM system.local"))[0]
        self.assertEqual(local_tokens.__len__(), 1, "too many tokens for peer")
        for tok in local_tokens:
            tokens += tok

        rows = rows_to_list(session.execute("SELECT tokens FROM system.peers"))
        self.assertEqual(rows.__len__(), nodes - 1)
        for row in rows:
            peer_tokens = row[0]
            self.assertEqual(peer_tokens.__len__(), 1, "too many tokens for peer")
            for tok in peer_tokens:
                tokens.append(tok)

        self.assertEqual(tokens.__len__(), dc_tokens.__len__())
        for cluster_token in tokens:
            tok = int(cluster_token)
            self.assertGreaterEqual(dc_tokens.index(tok), 0, "token in cluster does not match generated tokens")
    def _token_gen_test(self, nodes, randomPart=None):
        generated_tokens, session = self.prepare(randomPart, nodes=nodes)
        dc_tokens = generated_tokens[0]

        tokens = []
        local_tokens = rows_to_list(session.execute("SELECT tokens FROM system.local"))[0]
        assert local_tokens.__len__(), 1 == "too many tokens for peer"
        for tok in local_tokens:
            tokens += tok

        rows = rows_to_list(session.execute("SELECT tokens FROM system.peers"))
        assert rows.__len__() == nodes - 1
        for row in rows:
            peer_tokens = row[0]
            assert peer_tokens.__len__(), 1 == "too many tokens for peer"
            for tok in peer_tokens:
                tokens.append(tok)

        assert tokens.__len__() == dc_tokens.__len__()
        for cluster_token in tokens:
            tok = int(cluster_token)
            assert dc_tokens.index(tok), 0 >= "token in cluster does not match generated tokens"
    def _token_gen_test(self, nodes, randomPart=None):
        generated_tokens, session = self.prepare(randomPart, nodes=nodes)
        dc_tokens = generated_tokens[0]

        tokens = []
        local_tokens = rows_to_list(session.execute("SELECT tokens FROM system.local"))[0]
        assert local_tokens.__len__(), 1 == "too many tokens for peer"
        for tok in local_tokens:
            tokens += tok

        rows = rows_to_list(session.execute("SELECT tokens FROM system.peers"))
        assert rows.__len__() == nodes - 1
        for row in rows:
            peer_tokens = row[0]
            assert peer_tokens.__len__(), 1 == "too many tokens for peer"
            for tok in peer_tokens:
                tokens.append(tok)

        assert tokens.__len__() == dc_tokens.__len__()
        for cluster_token in tokens:
            tok = int(cluster_token)
            assert dc_tokens.index(tok), 0 >= "token in cluster does not match generated tokens"
    def test_query_indexes_with_vnodes(self):
        """
        Verifies correct query behaviour in the presence of vnodes
        @jira_ticket CASSANDRA-11104
        """
        cluster = self.cluster
        cluster.populate(2).start()
        node1, node2 = cluster.nodelist()
        session = self.patient_cql_connection(node1)
        session.execute("CREATE KEYSPACE ks WITH REPLICATION = {'class': 'SimpleStrategy', 'replication_factor': '1'};")
        session.execute("CREATE TABLE ks.compact_table (a int PRIMARY KEY, b int) WITH COMPACT STORAGE;")
        session.execute("CREATE INDEX keys_index ON ks.compact_table (b);")
        session.execute("CREATE TABLE ks.regular_table (a int PRIMARY KEY, b int)")
        session.execute("CREATE INDEX composites_index on ks.regular_table (b)")

        for node in cluster.nodelist():
            start = time.time()
            while time.time() < start + 10:
                debug("waiting for index to build")
                time.sleep(1)
                if index_is_built(node, session, 'ks', 'regular_table', 'composites_index'):
                    break
            else:
                raise DtestTimeoutError()

        insert_args = [(i, i % 2) for i in xrange(100)]
        execute_concurrent_with_args(session,
                                     session.prepare("INSERT INTO ks.compact_table (a, b) VALUES (?, ?)"),
                                     insert_args)
        execute_concurrent_with_args(session,
                                     session.prepare("INSERT INTO ks.regular_table (a, b) VALUES (?, ?)"),
                                     insert_args)

        res = session.execute("SELECT * FROM ks.compact_table WHERE b = 0")
        self.assertEqual(len(rows_to_list(res)), 50)
        res = session.execute("SELECT * FROM ks.regular_table WHERE b = 0")
        self.assertEqual(len(rows_to_list(res)), 50)
 def check_data_on_each_replica(self, expect_fully_repaired, initial_replica):
     """
     Perform a SELECT * query at CL.ONE on each replica in turn. If expect_fully_repaired is True, we verify that
     each replica returns the full row being queried. If not, then we only verify that the 'a' column has been
     repaired.
     """
     stmt = SimpleStatement("SELECT * FROM alter_rf_test.t1 WHERE k=1", consistency_level=ConsistencyLevel.ONE)
     logger.debug("Checking all if read repair has completed on all replicas")
     for n in self.cluster.nodelist():
         logger.debug("Checking {n}, {x}expecting all columns"
                      .format(n=n.name, x="" if expect_fully_repaired or n == initial_replica else "not "))
         session = self.patient_exclusive_cql_connection(n)
         res = rows_to_list(session.execute(stmt))
         logger.debug("Actual result: " + str(res))
         expected = [[1, 1, 1]] if expect_fully_repaired or n == initial_replica else [[1, 1, None]]
         if res != expected:
             raise NotRepairedException()
Пример #21
0
    def drop_counter_column_test(self):
        """Test for CASSANDRA-7831"""
        cluster = self.cluster
        cluster.populate(1).start()
        node1, = cluster.nodelist()
        session = self.patient_cql_connection(node1)
        self.create_ks(session, 'counter_tests', 1)

        session.execute("CREATE TABLE counter_bug (t int, c counter, primary key(t))")

        session.execute("UPDATE counter_bug SET c = c + 1 where t = 1")
        row = list(session.execute("SELECT * from counter_bug"))

        self.assertEqual(rows_to_list(row)[0], [1, 1])
        self.assertEqual(len(row), 1)

        session.execute("ALTER TABLE counter_bug drop c")

        assert_invalid(session, "ALTER TABLE counter_bug add c counter", "Cannot re-add previously dropped counter column c")
Пример #22
0
    def drop_counter_column_test(self):
        """Test for CASSANDRA-7831"""
        cluster = self.cluster
        cluster.populate(1).start()
        node1, = cluster.nodelist()
        session = self.patient_cql_connection(node1)
        create_ks(session, 'counter_tests', 1)

        session.execute("CREATE TABLE counter_bug (t int, c counter, primary key(t))")

        session.execute("UPDATE counter_bug SET c = c + 1 where t = 1")
        row = list(session.execute("SELECT * from counter_bug"))

        self.assertEqual(rows_to_list(row)[0], [1, 1])
        self.assertEqual(len(row), 1)

        session.execute("ALTER TABLE counter_bug drop c")

        assert_invalid(session, "ALTER TABLE counter_bug add c counter", "Cannot re-add previously dropped counter column c")
Пример #23
0
    def distribution_template(self, ratio_spec, expected_ratio, delta):
        """
        @param ratio_spec the string passed to `row-population-ratio` in the call to `cassandra-stress`
        @param expected_ratio the expected ratio of null/non-null values in the values written
        @param delta the acceptable delta between the expected and actual ratios

        A parameterized test for the `row-population-ratio` parameter to
        `cassandra-stress`.
        """
        self.cluster.populate(1).start(wait_for_binary_proto=True)
        node = self.cluster.nodelist()[0]
        node.stress(['write', 'n=1000', 'no-warmup', '-rate', 'threads=50', '-col', 'n=FIXED(50)',
                     '-insert', 'row-population-ratio={ratio_spec}'.format(ratio_spec=ratio_spec)])
        session = self.patient_cql_connection(node)
        written = rows_to_list(session.execute('SELECT * FROM keyspace1.standard1;'))

        num_nones = sum(row.count(None) for row in written)
        num_results = sum(len(row) for row in written)

        self.assertAlmostEqual(float(num_nones) / num_results, expected_ratio, delta=delta)
Пример #24
0
    def test_cdc_data_available_in_cdc_raw(self):
        ks_name = 'ks'
        # First, create a new node just for data generation.
        generation_node, generation_session = self.prepare(ks_name=ks_name)

        cdc_table_info = TableInfo(
            ks_name=ks_name, table_name='cdc_tab',
            column_spec=_16_uuid_column_spec,
            insert_stmt=_get_16_uuid_insert_stmt(ks_name, 'cdc_tab'),
            options={
                'cdc': 'true',
                # give table an explicit id so when we create it again it's the
                # same table and we can replay into it
                'id': uuid.uuid4()
            }
        )

        # Write until we get a new CL segment to avoid replaying initialization
        # mutations from this node's startup into system tables in the other
        # node. See CASSANDRA-11811.
        advance_to_next_cl_segment(
            session=generation_session,
            commitlog_dir=os.path.join(generation_node.get_path(), 'commitlogs')
        )

        generation_session.execute(cdc_table_info.create_stmt)

        # insert 10000 rows
        inserted_rows = _insert_rows(generation_session, cdc_table_info.name, cdc_table_info.insert_stmt,
                                     repeat((), 10000))

        # drain the node to guarantee all cl segments will be recycled
        logger.debug('draining')
        generation_node.drain()
        logger.debug('stopping')
        # stop the node and clean up all sessions attached to it
        generation_session.cluster.shutdown()
        generation_node.stop()

        # We can rely on the existing _cdc.idx files to determine which .log files contain cdc data.
        source_path = os.path.join(generation_node.get_path(), 'cdc_raw')
        source_cdc_indexes = {ReplayData.load(source_path, name)
                              for name in source_path if name.endswith('_cdc.idx')}
        # assertNotEqual(source_cdc_indexes, {})
        assert source_cdc_indexes != {}

        # create a new node to use for cdc_raw cl segment replay
        loading_node = self._init_new_loading_node(ks_name, cdc_table_info.create_stmt, self.cluster.version() < '4')

        # move cdc_raw contents to commitlog directories, then start the
        # node again to trigger commitlog replay, which should replay the
        # cdc_raw files we moved to commitlogs into memtables.
        logger.debug('moving cdc_raw and restarting node')
        _move_commitlog_segments(
            os.path.join(generation_node.get_path(), 'cdc_raw'),
            os.path.join(loading_node.get_path(), 'commitlogs')
        )
        loading_node.start(wait_for_binary_proto=True)
        logger.debug('node successfully started; waiting on log replay')
        loading_node.grep_log('Log replay complete')
        logger.debug('log replay complete')

        # final assertions
        validation_session = self.patient_exclusive_cql_connection(loading_node)
        data_in_cdc_table_after_restart = rows_to_list(
            validation_session.execute('SELECT * FROM ' + cdc_table_info.name)
        )
        logger.debug('found {cdc} values in CDC table'.format(
            cdc=len(data_in_cdc_table_after_restart)
        ))

        # Then we assert that the CDC data that we expect to be there is there.
        # All data that was in CDC tables should have been copied to cdc_raw,
        # then used in commitlog replay, so it should be back in the cluster.
        assert (inserted_rows == data_in_cdc_table_after_restart), 'not all expected data selected'

        if self.cluster.version() >= '4.0':
            # Create ReplayData objects for each index file found in loading cluster
            loading_path = os.path.join(loading_node.get_path(), 'cdc_raw')
            dest_cdc_indexes = [ReplayData.load(loading_path, name)
                                for name in os.listdir(loading_path) if name.endswith('_cdc.idx')]

            # Compare source replay data to dest to ensure replay process created both hard links and index files.
            for srd in source_cdc_indexes:
                # Confirm both log and index are in dest
                assert os.path.isfile(os.path.join(loading_path, srd.idx_name))
                assert os.path.isfile(os.path.join(loading_path, srd.log_name))

                # Find dest ReplayData that corresponds to the source (should be exactly 1)
                corresponding_dest_replay_datae = [x for x in dest_cdc_indexes
                                                   if srd.idx_name == x.idx_name]
                assert_length_equal(corresponding_dest_replay_datae, 1)
                drd = corresponding_dest_replay_datae[0]

                # We can't compare equality on offsets since replay uses the raw file length as the written
                # cdc offset. We *can*, however, confirm that the offset in the replayed file is >=
                # the source file, ensuring clients are signaled to replay at least all the data in the
                # log.
                assert drd.offset >= srd.offset

                # Confirm completed flag is the same in both
                assert srd.completed == drd.completed

            # Confirm that the relationship between index files on the source
            # and destination looks like we expect.
            # First, grab the mapping between the two, make sure it's a 1-1
            # mapping, and transform the dict to reflect that:
            src_to_dest_idx_map = {
                src_rd: [dest_rd for dest_rd in dest_cdc_indexes
                         if dest_rd.idx_name == src_rd.idx_name]
                for src_rd in source_cdc_indexes
            }
            for src_rd, dest_rds in src_to_dest_idx_map.items():
                assert_length_equal(dest_rds, 1)
                src_to_dest_idx_map[src_rd] = dest_rds[0]
            # All offsets in idx files that were copied should be >0 on the
            # destination node.
            assert (
                0 not in {i.offset for i in src_to_dest_idx_map.values()}),\
                ('Found index offsets == 0 in an index file on the '
                 'destination node that corresponds to an index file on the '
                 'source node:\n'
                 '{}').format(pformat(src_to_dest_idx_map))
            # Offsets of all shared indexes should be >= on the destination
            # than on the source.
            for src_rd, dest_rd in src_to_dest_idx_map.items():
                assert dest_rd.offset >= src_rd.offset

            src_to_dest_idx_map = {
                src_rd: [dest_rd for dest_rd in dest_cdc_indexes
                         if dest_rd.idx_name == src_rd.idx_name]
                for src_rd in source_cdc_indexes
            }
            for k, v in src_to_dest_idx_map.items():
                assert_length_equal(v, 1)
                assert k.offset >= v.offset
 def read_as_list(self, query, session=None, node=None):
     session = session or self.exclusive_cql_connection(node or self.node1)
     return rows_to_list(self.quorum(session, query))
Пример #26
0
    def test_json_tools(self):

        logger.debug("Starting cluster...")
        cluster = self.cluster
        cluster.set_batch_commitlog(enabled=True)
        cluster.populate(1).start()

        logger.debug("Version: " + cluster.version().vstring)

        logger.debug("Getting CQLSH...")
        [node1] = cluster.nodelist()
        session = self.patient_cql_connection(node1)

        logger.debug("Inserting data...")
        create_ks(session, 'Test', 1)

        session.execute("""
            CREATE TABLE users (
                user_name varchar PRIMARY KEY,
                password varchar,
                gender varchar,
                state varchar,
                birth_year bigint
            );
        """)

        session.execute(
            "INSERT INTO Test. users (user_name, password, gender, state, birth_year) "
            "VALUES ('frodo', 'pass@', 'male', 'CA', 1985);")
        session.execute(
            "INSERT INTO Test. users (user_name, password, gender, state, birth_year) "
            "VALUES ('sam', '@pass', 'male', 'NY', 1980);")

        res = session.execute("SELECT * FROM Test. users")

        assert_lists_equal_ignoring_order(
            rows_to_list(res), [['frodo', 1985, 'male', 'pass@', 'CA'],
                                ['sam', 1980, 'male', '@pass', 'NY']])

        logger.debug("Flushing and stopping cluster...")
        node1.flush()
        cluster.stop()

        logger.debug("Exporting to JSON file...")
        json_path = tempfile.mktemp(suffix='.schema.json')
        with open(json_path, 'w') as f:
            node1.run_sstable2json(f)

        with open(json_path, 'r') as fin:
            data = fin.read().splitlines(True)
        if data[0][0] == 'W':
            with open(json_path, 'w') as fout:
                fout.writelines(data[1:])

        logger.debug("Deleting cluster and creating new...")
        cluster.clear()
        cluster.start()

        logger.debug("Inserting data...")
        session = self.patient_cql_connection(node1)
        create_ks(session, 'Test', 1)

        session.execute("""
            CREATE TABLE users (
                user_name varchar PRIMARY KEY,
                password varchar,
                gender varchar,
                state varchar,
                birth_year bigint
            );
        """)

        session.execute(
            "INSERT INTO Test. users (user_name, password, gender, state, birth_year) "
            "VALUES ('gandalf', 'p@$$', 'male', 'WA', 1955);")
        node1.flush()
        cluster.stop()

        logger.debug("Importing JSON file...")
        with open(json_path) as f:
            node1.run_json2sstable(f, "test", "users")
        os.remove(json_path)

        logger.debug("Verifying import...")
        cluster.start()
        [node1] = cluster.nodelist()
        session = self.patient_cql_connection(node1)

        res = session.execute("SELECT * FROM Test. users")

        logger.debug("data: " + str(res))

        assert_lists_equal_ignoring_order(
            rows_to_list(res), [['frodo', 1985, 'male', 'pass@', 'CA'],
                                ['sam', 1980, 'male', '@pass', 'NY'],
                                ['gandalf', 1955, 'male', 'p@$$', 'WA']])
Пример #27
0
    def alter_rf_and_run_read_repair_test(self):
        """
        @jira_ticket CASSANDRA-10655
        @jira_ticket CASSANDRA-10657

        Test that querying only a subset of all the columns in a row doesn't confuse read-repair to avoid
        the problem described in CASSANDRA-10655.
        """

        session = self.patient_cql_connection(self.cluster.nodelist()[0])
        session.execute("""CREATE KEYSPACE alter_rf_test
                           WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1};""")
        session.execute("CREATE TABLE alter_rf_test.t1 (k int PRIMARY KEY, a int, b int);")
        session.execute("INSERT INTO alter_rf_test.t1 (k, a, b) VALUES (1, 1, 1);")
        cl_one_stmt = SimpleStatement("SELECT * FROM alter_rf_test.t1 WHERE k=1",
                                      consistency_level=ConsistencyLevel.ONE)

        # identify the initial replica and trigger a flush to ensure reads come from sstables
        initial_replica, non_replicas = self.identify_initial_placement('alter_rf_test', 't1', 1)
        debug("At RF=1 replica for data is " + initial_replica.name)
        initial_replica.flush()

        # At RF=1, it shouldn't matter which node we query, as the actual data should always come from the
        # initial replica when reading at CL ONE
        for n in self.cluster.nodelist():
            debug("Checking " + n.name)
            session = self.patient_exclusive_cql_connection(n)
            assert_one(session, "SELECT * FROM alter_rf_test.t1 WHERE k=1", [1, 1, 1], cl=ConsistencyLevel.ONE)

        # Alter so RF=n but don't repair, then execute a query which selects only a subset of the columns. Run this at
        # CL ALL on one of the nodes which doesn't currently have the data, triggering a read repair.
        # The expectation will be that every replicas will have been repaired for that column (but we make no assumptions
        # on the other columns).
        debug("Changing RF from 1 to 3")
        session.execute("""ALTER KEYSPACE alter_rf_test
                           WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 3};""")
        cl_all_stmt = SimpleStatement("SELECT a FROM alter_rf_test.t1 WHERE k=1",
                                      consistency_level=ConsistencyLevel.ALL)
        debug("Executing SELECT on non-initial replica to trigger read repair " + non_replicas[0].name)
        read_repair_session = self.patient_exclusive_cql_connection(non_replicas[0])
        # result of the CL ALL query contains only the selected column
        assert_one(read_repair_session, "SELECT a FROM alter_rf_test.t1 WHERE k=1", [1], cl=ConsistencyLevel.ALL)

        # Check the results of the read repair by querying each replica again at CL ONE
        debug("Re-running SELECTs at CL ONE to verify read repair")
        for n in self.cluster.nodelist():
            debug("Checking " + n.name)
            session = self.patient_exclusive_cql_connection(n)
            res = rows_to_list(session.execute(cl_one_stmt))
            # Column a must be 1 everywhere, and column b must be either 1 or None everywhere
            self.assertIn(res[0][:2], [[1, 1], [1, None]])

        # Now query at ALL but selecting all columns
        query = "SELECT * FROM alter_rf_test.t1 WHERE k=1"
        debug("Executing SELECT on non-initial replica to trigger read repair " + non_replicas[0].name)
        read_repair_session = self.patient_exclusive_cql_connection(non_replicas[0])
        assert_one(session, query, [1, 1, 1], cl=ConsistencyLevel.ALL)

        # Check all replica is fully up to date
        debug("Re-running SELECTs at CL ONE to verify read repair")
        for n in self.cluster.nodelist():
            debug("Checking " + n.name)
            session = self.patient_exclusive_cql_connection(n)
            assert_one(session, query, [1, 1, 1], cl=ConsistencyLevel.ONE)
Пример #28
0
 def _fetch_initial_data(self, table='keyspace1.standard1', cl=ConsistencyLevel.THREE, limit=10000):
     logger.debug("Fetching initial data from {} on {} with CL={} and LIMIT={}".format(table, self.query_node.name, cl, limit))
     session = self.patient_cql_connection(self.query_node)
     query = SimpleStatement('select * from {} LIMIT {}'.format(table, limit), consistency_level=cl)
     return rows_to_list(session.execute(query, timeout=20))
Пример #29
0
    def test_cdc_data_available_in_cdc_raw(self):
        ks_name = 'ks'
        # First, create a new node just for data generation.
        generation_node, generation_session = self.prepare(ks_name=ks_name)

        cdc_table_info = TableInfo(
            ks_name=ks_name,
            table_name='cdc_tab',
            column_spec=_16_uuid_column_spec,
            insert_stmt=_get_16_uuid_insert_stmt(ks_name, 'cdc_tab'),
            options={
                'cdc': 'true',
                # give table an explicit id so when we create it again it's the
                # same table and we can replay into it
                'id': uuid.uuid4()
            })

        # Write until we get a new CL segment to avoid replaying initialization
        # mutations from this node's startup into system tables in the other
        # node. See CASSANDRA-11811.
        advance_to_next_cl_segment(session=generation_session,
                                   commitlog_dir=os.path.join(
                                       generation_node.get_path(),
                                       'commitlogs'))

        generation_session.execute(cdc_table_info.create_stmt)

        # insert 10000 rows
        inserted_rows = _insert_rows(generation_session, cdc_table_info.name,
                                     cdc_table_info.insert_stmt,
                                     repeat((), 10000))

        # drain the node to guarantee all cl segements will be recycled
        debug('draining')
        generation_node.drain()
        debug('stopping')
        # stop the node and clean up all sessions attached to it
        generation_node.stop()
        generation_session.cluster.shutdown()

        # create a new node to use for cdc_raw cl segment replay
        loading_node = self._init_new_loading_node(
            ks_name, cdc_table_info.create_stmt,
            self.cluster.version() < '4')

        # move cdc_raw contents to commitlog directories, then start the
        # node again to trigger commitlog replay, which should replay the
        # cdc_raw files we moved to commitlogs into memtables.
        debug('moving cdc_raw and restarting node')
        _move_contents(os.path.join(generation_node.get_path(), 'cdc_raw'),
                       os.path.join(loading_node.get_path(), 'commitlogs'))
        loading_node.start(wait_for_binary_proto=True)
        debug('node successfully started; waiting on log replay')
        loading_node.grep_log('Log replay complete')
        debug('log replay complete')

        # final assertions
        validation_session = self.patient_exclusive_cql_connection(
            loading_node)
        data_in_cdc_table_after_restart = rows_to_list(
            validation_session.execute('SELECT * FROM ' + cdc_table_info.name))
        debug('found {cdc} values in CDC table'.format(
            cdc=len(data_in_cdc_table_after_restart)))
        # Then we assert that the CDC data that we expect to be there is there.
        # All data that was in CDC tables should have been copied to cdc_raw,
        # then used in commitlog replay, so it should be back in the cluster.
        self.assertEqual(
            inserted_rows,
            data_in_cdc_table_after_restart,
            # The message on failure is too long, since cdc_data is thousands
            # of items, so we print something else here
            msg='not all expected data selected')
Пример #30
0
    def alter_rf_and_run_read_repair_test(self):
        """
        @jira_ticket CASSANDRA-10655
        @jira_ticket CASSANDRA-10657

        Test that querying only a subset of all the columns in a row doesn't confuse read-repair to avoid
        the problem described in CASSANDRA-10655.
        """

        session = self.patient_cql_connection(self.cluster.nodelist()[0])
        session.execute("""CREATE KEYSPACE alter_rf_test
                           WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1};"""
                        )
        session.execute(
            "CREATE TABLE alter_rf_test.t1 (k int PRIMARY KEY, a int, b int);")
        session.execute(
            "INSERT INTO alter_rf_test.t1 (k, a, b) VALUES (1, 1, 1);")
        cl_one_stmt = SimpleStatement(
            "SELECT * FROM alter_rf_test.t1 WHERE k=1",
            consistency_level=ConsistencyLevel.ONE)

        # identify the initial replica and trigger a flush to ensure reads come from sstables
        initial_replica, non_replicas = self.identify_initial_placement(
            'alter_rf_test', 't1', 1)
        debug("At RF=1 replica for data is " + initial_replica.name)
        initial_replica.flush()

        # At RF=1, it shouldn't matter which node we query, as the actual data should always come from the
        # initial replica when reading at CL ONE
        for n in self.cluster.nodelist():
            debug("Checking " + n.name)
            session = self.patient_exclusive_cql_connection(n)
            assert_one(session,
                       "SELECT * FROM alter_rf_test.t1 WHERE k=1", [1, 1, 1],
                       cl=ConsistencyLevel.ONE)

        # Alter so RF=n but don't repair, then execute a query which selects only a subset of the columns. Run this at
        # CL ALL on one of the nodes which doesn't currently have the data, triggering a read repair.
        # The expectation will be that every replicas will have been repaired for that column (but we make no assumptions
        # on the other columns).
        debug("Changing RF from 1 to 3")
        session.execute("""ALTER KEYSPACE alter_rf_test
                           WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 3};"""
                        )
        cl_all_stmt = SimpleStatement(
            "SELECT a FROM alter_rf_test.t1 WHERE k=1",
            consistency_level=ConsistencyLevel.ALL)
        debug(
            "Executing SELECT on non-initial replica to trigger read repair " +
            non_replicas[0].name)
        read_repair_session = self.patient_exclusive_cql_connection(
            non_replicas[0])
        # result of the CL ALL query contains only the selected column
        assert_one(read_repair_session,
                   "SELECT a FROM alter_rf_test.t1 WHERE k=1", [1],
                   cl=ConsistencyLevel.ALL)

        # Check the results of the read repair by querying each replica again at CL ONE
        debug("Re-running SELECTs at CL ONE to verify read repair")
        for n in self.cluster.nodelist():
            debug("Checking " + n.name)
            session = self.patient_exclusive_cql_connection(n)
            res = rows_to_list(session.execute(cl_one_stmt))
            # Column a must be 1 everywhere, and column b must be either 1 or None everywhere
            self.assertIn(res[0][:2], [[1, 1], [1, None]])

        # Now query at ALL but selecting all columns
        query = "SELECT * FROM alter_rf_test.t1 WHERE k=1"
        debug(
            "Executing SELECT on non-initial replica to trigger read repair " +
            non_replicas[0].name)
        read_repair_session = self.patient_exclusive_cql_connection(
            non_replicas[0])
        assert_one(session, query, [1, 1, 1], cl=ConsistencyLevel.ALL)

        # Check all replica is fully up to date
        debug("Re-running SELECTs at CL ONE to verify read repair")
        for n in self.cluster.nodelist():
            debug("Checking " + n.name)
            session = self.patient_exclusive_cql_connection(n)
            assert_one(session, query, [1, 1, 1], cl=ConsistencyLevel.ONE)
Пример #31
0
    def test_cdc_data_available_in_cdc_raw(self):
        ks_name = 'ks'
        # First, create a new node just for data generation.
        generation_node, generation_session = self.prepare(ks_name=ks_name)

        cdc_table_info = TableInfo(
            ks_name=ks_name,
            table_name='cdc_tab',
            column_spec=_16_uuid_column_spec,
            insert_stmt=_get_16_uuid_insert_stmt(ks_name, 'cdc_tab'),
            options={
                'cdc': 'true',
                # give table an explicit id so when we create it again it's the
                # same table and we can replay into it
                'id': uuid.uuid4()
            })

        # Write until we get a new CL segment to avoid replaying initialization
        # mutations from this node's startup into system tables in the other
        # node. See CASSANDRA-11811.
        advance_to_next_cl_segment(session=generation_session,
                                   commitlog_dir=os.path.join(
                                       generation_node.get_path(),
                                       'commitlogs'))

        generation_session.execute(cdc_table_info.create_stmt)

        # insert 10000 rows
        inserted_rows = _insert_rows(generation_session, cdc_table_info.name,
                                     cdc_table_info.insert_stmt,
                                     repeat((), 10000))

        # drain the node to guarantee all cl segments will be recycled
        logger.debug('draining')
        generation_node.drain()
        logger.debug('stopping')
        # stop the node and clean up all sessions attached to it
        generation_session.cluster.shutdown()
        generation_node.stop()

        # We can rely on the existing _cdc.idx files to determine which .log files contain cdc data.
        source_path = os.path.join(generation_node.get_path(), 'cdc_raw')
        source_cdc_indexes = {
            ReplayData.load(source_path, name)
            for name in source_path if name.endswith('_cdc.idx')
        }
        # assertNotEqual(source_cdc_indexes, {})
        assert source_cdc_indexes != {}

        # create a new node to use for cdc_raw cl segment replay
        loading_node = self._init_new_loading_node(
            ks_name, cdc_table_info.create_stmt,
            self.cluster.version() < '4')

        # move cdc_raw contents to commitlog directories, then start the
        # node again to trigger commitlog replay, which should replay the
        # cdc_raw files we moved to commitlogs into memtables.
        logger.debug('moving cdc_raw and restarting node')
        _move_commitlog_segments(
            os.path.join(generation_node.get_path(), 'cdc_raw'),
            os.path.join(loading_node.get_path(), 'commitlogs'))
        loading_node.start(wait_for_binary_proto=True)
        logger.debug('node successfully started; waiting on log replay')
        loading_node.grep_log('Log replay complete')
        logger.debug('log replay complete')

        # final assertions
        validation_session = self.patient_exclusive_cql_connection(
            loading_node)
        data_in_cdc_table_after_restart = rows_to_list(
            validation_session.execute('SELECT * FROM ' + cdc_table_info.name))
        logger.debug('found {cdc} values in CDC table'.format(
            cdc=len(data_in_cdc_table_after_restart)))

        # Then we assert that the CDC data that we expect to be there is there.
        # All data that was in CDC tables should have been copied to cdc_raw,
        # then used in commitlog replay, so it should be back in the cluster.
        assert (inserted_rows == data_in_cdc_table_after_restart
                ), 'not all expected data selected'

        if self.cluster.version() >= '4.0':
            # Create ReplayData objects for each index file found in loading cluster
            loading_path = os.path.join(loading_node.get_path(), 'cdc_raw')
            dest_cdc_indexes = [
                ReplayData.load(loading_path, name)
                for name in os.listdir(loading_path)
                if name.endswith('_cdc.idx')
            ]

            # Compare source replay data to dest to ensure replay process created both hard links and index files.
            for srd in source_cdc_indexes:
                # Confirm both log and index are in dest
                assert os.path.isfile(os.path.join(loading_path, srd.idx_name))
                assert os.path.isfile(os.path.join(loading_path, srd.log_name))

                # Find dest ReplayData that corresponds to the source (should be exactly 1)
                corresponding_dest_replay_datae = [
                    x for x in dest_cdc_indexes if srd.idx_name == x.idx_name
                ]
                assert_length_equal(corresponding_dest_replay_datae, 1)
                drd = corresponding_dest_replay_datae[0]

                # We can't compare equality on offsets since replay uses the raw file length as the written
                # cdc offset. We *can*, however, confirm that the offset in the replayed file is >=
                # the source file, ensuring clients are signaled to replay at least all the data in the
                # log.
                assert drd.offset >= srd.offset

                # Confirm completed flag is the same in both
                assert srd.completed == drd.completed

            # Confirm that the relationship between index files on the source
            # and destination looks like we expect.
            # First, grab the mapping between the two, make sure it's a 1-1
            # mapping, and transform the dict to reflect that:
            src_to_dest_idx_map = {
                src_rd: [
                    dest_rd for dest_rd in dest_cdc_indexes
                    if dest_rd.idx_name == src_rd.idx_name
                ]
                for src_rd in source_cdc_indexes
            }
            for src_rd, dest_rds in src_to_dest_idx_map.items():
                assert_length_equal(dest_rds, 1)
                src_to_dest_idx_map[src_rd] = dest_rds[0]
            # All offsets in idx files that were copied should be >0 on the
            # destination node.
            assert (
                0 not in {i.offset for i in src_to_dest_idx_map.values()}),\
                ('Found index offsets == 0 in an index file on the '
                 'destination node that corresponds to an index file on the '
                 'source node:\n'
                 '{}').format(pformat(src_to_dest_idx_map))
            # Offsets of all shared indexes should be >= on the destination
            # than on the source.
            for src_rd, dest_rd in src_to_dest_idx_map.items():
                assert dest_rd.offset >= src_rd.offset

            src_to_dest_idx_map = {
                src_rd: [
                    dest_rd for dest_rd in dest_cdc_indexes
                    if dest_rd.idx_name == src_rd.idx_name
                ]
                for src_rd in source_cdc_indexes
            }
            for k, v in src_to_dest_idx_map.items():
                assert_length_equal(v, 1)
                assert k.offset >= v.offset
Пример #32
0
    def test_cdc_data_available_in_cdc_raw(self):
        ks_name = 'ks'
        # First, create a new node just for data generation.
        generation_node, generation_session = self.prepare(ks_name=ks_name)

        cdc_table_info = TableInfo(
            ks_name=ks_name, table_name='cdc_tab',
            column_spec=_16_uuid_column_spec,
            insert_stmt=_get_16_uuid_insert_stmt(ks_name, 'cdc_tab'),
            options={
                'cdc': 'true',
                # give table an explicit id so when we create it again it's the
                # same table and we can replay into it
                'id': uuid.uuid4()
            }
        )

        # Write until we get a new CL segment to avoid replaying initialization
        # mutations from this node's startup into system tables in the other
        # node. See CASSANDRA-11811.
        advance_to_next_cl_segment(
            session=generation_session,
            commitlog_dir=os.path.join(generation_node.get_path(), 'commitlogs')
        )

        generation_session.execute(cdc_table_info.create_stmt)

        # insert 10000 rows
        inserted_rows = _insert_rows(generation_session, cdc_table_info.name, cdc_table_info.insert_stmt, repeat((), 10000))

        # drain the node to guarantee all cl segements will be recycled
        debug('draining')
        generation_node.drain()
        debug('stopping')
        # stop the node and clean up all sessions attached to it
        generation_node.stop()
        generation_session.cluster.shutdown()

        # create a new node to use for cdc_raw cl segment replay
        loading_node = self._init_new_loading_node(ks_name, cdc_table_info.create_stmt, self.cluster.version() < '4')

        # move cdc_raw contents to commitlog directories, then start the
        # node again to trigger commitlog replay, which should replay the
        # cdc_raw files we moved to commitlogs into memtables.
        debug('moving cdc_raw and restarting node')
        _move_contents(
            os.path.join(generation_node.get_path(), 'cdc_raw'),
            os.path.join(loading_node.get_path(), 'commitlogs')
        )
        loading_node.start(wait_for_binary_proto=True)
        debug('node successfully started; waiting on log replay')
        loading_node.grep_log('Log replay complete')
        debug('log replay complete')

        # final assertions
        validation_session = self.patient_exclusive_cql_connection(loading_node)
        data_in_cdc_table_after_restart = rows_to_list(
            validation_session.execute('SELECT * FROM ' + cdc_table_info.name)
        )
        debug('found {cdc} values in CDC table'.format(
            cdc=len(data_in_cdc_table_after_restart)
        ))
        # Then we assert that the CDC data that we expect to be there is there.
        # All data that was in CDC tables should have been copied to cdc_raw,
        # then used in commitlog replay, so it should be back in the cluster.
        self.assertEqual(
            inserted_rows,
            data_in_cdc_table_after_restart,
            # The message on failure is too long, since cdc_data is thousands
            # of items, so we print something else here
            msg='not all expected data selected'
        )
Пример #33
0
 def read_as_list(self, query, session=None, node=None):
     session = session or self.exclusive_cql_connection(node or self.node1)
     return rows_to_list(self.quorum(session, query))
Пример #34
0
    def json_tools_test(self):

        debug("Starting cluster...")
        cluster = self.cluster
        cluster.set_batch_commitlog(enabled=True)
        cluster.populate(1).start()

        debug("Version: " + cluster.version().vstring)

        debug("Getting CQLSH...")
        [node1] = cluster.nodelist()
        session = self.patient_cql_connection(node1)

        debug("Inserting data...")
        create_ks(session, 'Test', 1)

        session.execute("""
            CREATE TABLE users (
                user_name varchar PRIMARY KEY,
                password varchar,
                gender varchar,
                state varchar,
                birth_year bigint
            );
        """)

        session.execute("INSERT INTO Test. users (user_name, password, gender, state, birth_year) VALUES('frodo', 'pass@', 'male', 'CA', 1985);")
        session.execute("INSERT INTO Test. users (user_name, password, gender, state, birth_year) VALUES('sam', '@pass', 'male', 'NY', 1980);")

        res = session.execute("SELECT * FROM Test. users")

        self.assertItemsEqual(rows_to_list(res),
                              [[u'frodo', 1985, u'male', u'pass@', u'CA'],
                               [u'sam', 1980, u'male', u'@pass', u'NY']])

        debug("Flushing and stopping cluster...")
        node1.flush()
        cluster.stop()

        debug("Exporting to JSON file...")
        json_path = tempfile.mktemp(suffix='.schema.json')
        with open(json_path, 'w') as f:
            node1.run_sstable2json(f)

        with open(json_path, 'r') as fin:
            data = fin.read().splitlines(True)
        if data[0][0] == 'W':
            with open(json_path, 'w') as fout:
                fout.writelines(data[1:])

        debug("Deleting cluster and creating new...")
        cluster.clear()
        cluster.start()

        debug("Inserting data...")
        session = self.patient_cql_connection(node1)
        create_ks(session, 'Test', 1)

        session.execute("""
            CREATE TABLE users (
                user_name varchar PRIMARY KEY,
                password varchar,
                gender varchar,
                state varchar,
                birth_year bigint
            );
        """)

        session.execute("INSERT INTO Test. users (user_name, password, gender, state, birth_year) VALUES('gandalf', 'p@$$', 'male', 'WA', 1955);")
        node1.flush()
        cluster.stop()

        debug("Importing JSON file...")
        with open(json_path) as f:
            node1.run_json2sstable(f, "test", "users")
        os.remove(json_path)

        debug("Verifying import...")
        cluster.start()
        [node1] = cluster.nodelist()
        session = self.patient_cql_connection(node1)

        res = session.execute("SELECT * FROM Test. users")

        debug("data: " + str(res))

        self.assertItemsEqual(rows_to_list(res),
                              [[u'frodo', 1985, u'male', u'pass@', u'CA'],
                               [u'sam', 1980, u'male', u'@pass', u'NY'],
                               [u'gandalf', 1955, u'male', u'p@$$', u'WA']])