def readrepair_test(self): cluster = self.cluster cluster.set_configuration_options(values={"hinted_handoff_enabled": False}) if DISABLE_VNODES: cluster.populate(2).start() else: tokens = cluster.balanced_tokens(2) cluster.populate(2, tokens=tokens).start() node1, node2 = cluster.nodelist() session = self.patient_cql_connection(node1) self.create_ks(session, "ks", 2) create_c1c2_table(self, session, read_repair=1.0) node2.stop(wait_other_notice=True) insert_c1c2(session, n=10000, consistency=ConsistencyLevel.ONE) node2.start(wait_other_notice=True) # query everything to cause RR for n in xrange(0, 10000): query_c1c2(session, n, ConsistencyLevel.QUORUM) node1.stop(wait_other_notice=True) # Check node2 for all the keys that should have been repaired session = self.patient_cql_connection(node2, keyspace="ks") for n in xrange(0, 10000): query_c1c2(session, n, ConsistencyLevel.ONE)
def quorum_available_during_failure_test(self): CL = 'QUORUM' RF = 3 debug("Creating a ring") cluster = self.cluster if ENABLE_VNODES: tokens = cluster.balanced_tokens(3) cluster.populate(3, tokens=tokens).start() else: cluster.populate(3).start() [node1, node2, node3] = cluster.nodelist() cluster.start() debug("Set to talk to node 2") cursor = self.cql_connection(node2).cursor() self.create_ks(cursor, 'ks', RF) create_c1c2_table(self, cursor) debug("Generating some data") for n in xrange(100): insert_c1c2(cursor, n, CL) debug("Taking down node1") node1.stop(wait_other_notice=True) debug("Reading back data.") for n in xrange(100): query_c1c2(cursor, n, CL)
def _do_hinted_handoff(self, node1, node2, enabled): """ Test that if we stop one node the other one will store hints only when hinted handoff is enabled """ session = self.patient_exclusive_cql_connection(node1) self.create_ks(session, 'ks', 2) create_c1c2_table(self, session) node2.stop(wait_other_notice=True) insert_c1c2(session, n=100, consistency=ConsistencyLevel.ONE) log_mark = node1.mark_log() node2.start(wait_other_notice=True) if enabled: node1.watch_log_for(["Finished hinted"], from_mark=log_mark, timeout=120) node1.stop(wait_other_notice=True) # Check node2 for all the keys that should have been delivered via HH if enabled or not if not enabled session = self.patient_exclusive_cql_connection(node2, keyspace='ks') for n in xrange(0, 100): if enabled: query_c1c2(session, n, ConsistencyLevel.ONE) else: query_c1c2(session, n, ConsistencyLevel.ONE, tolerate_missing=True, must_be_missing=True)
def hintedhandoff_test(self): cluster = self.cluster if DISABLE_VNODES: cluster.populate(2).start() else: tokens = cluster.balanced_tokens(2) cluster.populate(2, tokens=tokens).start() [node1, node2] = cluster.nodelist() cursor = self.patient_cql_connection(node1).cursor() self.create_ks(cursor, "ks", 2) create_c1c2_table(self, cursor) node2.stop(wait_other_notice=True) for n in xrange(0, 100): insert_c1c2(cursor, n, "ONE") log_mark = node1.mark_log() node2.start() node1.watch_log_for(["Finished hinted"], from_mark=log_mark, timeout=90) node1.stop(wait_other_notice=True) # Check node2 for all the keys that should have been delivered via HH cursor = self.patient_cql_connection(node2, keyspace="ks").cursor() for n in xrange(0, 100): query_c1c2(cursor, n, "ONE")
def decommission_test(self): cluster = self.cluster tokens = cluster.balanced_tokens(4) cluster.populate(4, tokens=tokens).start() node1, node2, node3, node4 = cluster.nodelist() session = self.patient_cql_connection(node1) self.create_ks(session, 'ks', 2) self.create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=30000, consistency=ConsistencyLevel.QUORUM) cluster.flush() sizes = [node.data_size() for node in cluster.nodelist() if node.is_running()] init_size = sizes[0] assert_almost_equal(*sizes) time.sleep(.5) node4.decommission() node4.stop() cluster.cleanup() time.sleep(.5) # Check we can get all the keys for n in xrange(0, 30000): query_c1c2(session, n, ConsistencyLevel.QUORUM) sizes = [node.data_size() for node in cluster.nodelist() if node.is_running()] debug(sizes) assert_almost_equal(sizes[0], sizes[1]) assert_almost_equal((2.0 / 3.0) * sizes[0], sizes[2]) assert_almost_equal(sizes[2], init_size)
def quorum_quorum_test(self): cluster = self.cluster cluster.populate(3).start() [node1, node2, node3] = cluster.nodelist() cursor1 = self.patient_cql_connection(node1).cursor() self.create_ks(cursor1, 'ks', 3) create_c1c2_table(self, cursor1) cursor2 = self.patient_cql_connection(node2, 'ks').cursor() # insert and get at CL.QUORUM for n in xrange(0, 100): insert_c1c2(cursor1, n, "QUORUM") query_c1c2(cursor2, n, "QUORUM") # shutdown a node an test again node3.stop(wait_other_notice=True) for n in xrange(100, 200): insert_c1c2(cursor1, n, "QUORUM") query_c1c2(cursor2, n, "QUORUM") # shutdown another node and test we get unavailabe exception node2.stop(wait_other_notice=True) assert_unavailable(insert_c1c2, cursor1, 200, "QUORUM")
def quorum_available_during_failure_test(self): CL = ConsistencyLevel.QUORUM RF = 3 debug("Creating a ring") cluster = self.cluster if DISABLE_VNODES: cluster.populate(3).start() else: tokens = cluster.balanced_tokens(3) cluster.populate(3, tokens=tokens).start() node1, node2, node3 = cluster.nodelist() cluster.start() debug("Set to talk to node 2") session = self.patient_cql_connection(node2) self.create_ks(session, "ks", RF) create_c1c2_table(self, session) debug("Generating some data") insert_c1c2(session, n=100, consistency=CL) debug("Taking down node1") node1.stop(wait_other_notice=True) debug("Reading back data.") for n in xrange(100): query_c1c2(session, n, CL)
def readrepair_test(self): cluster = self.cluster cluster.set_configuration_options( values={'hinted_handoff_enabled': False}) if DISABLE_VNODES: cluster.populate(2).start() else: tokens = cluster.balanced_tokens(2) cluster.populate(2, tokens=tokens).start() [node1, node2] = cluster.nodelist() cursor = self.patient_cql_connection(node1).cursor() self.create_ks(cursor, 'ks', 2) create_c1c2_table(self, cursor, read_repair=1.0) node2.stop(wait_other_notice=True) for n in xrange(0, 10000): insert_c1c2(cursor, n, "ONE") node2.start(wait_other_notice=True) time.sleep(5) # query everything to cause RR for n in xrange(0, 10000): query_c1c2(cursor, n, "QUORUM") node1.stop(wait_other_notice=True) # Check node2 for all the keys that should have been repaired cursor = self.patient_cql_connection(node2, keyspace='ks').cursor() for n in xrange(0, 10000): query_c1c2(cursor, n, "ONE")
def quorum_available_during_failure_test(self): CL = ConsistencyLevel.QUORUM RF = 3 debug("Creating a ring") cluster = self.cluster if DISABLE_VNODES: cluster.populate(3).start() else: tokens = cluster.balanced_tokens(3) cluster.populate(3, tokens=tokens).start() node1, node2, node3 = cluster.nodelist() debug("Set to talk to node 2") session = self.patient_cql_connection(node2) self.create_ks(session, 'ks', RF) create_c1c2_table(self, session) debug("Generating some data") insert_c1c2(session, n=100, consistency=CL) debug("Taking down node1") node1.stop(wait_other_notice=True) debug("Reading back data.") for n in xrange(100): query_c1c2(session, n, CL)
def check_rows_on_node(self, node_to_check, rows, found=None, missings=None, restart=True): if found is None: found = [] if missings is None: missings = [] stopped_nodes = [] for node in self.cluster.nodes.values(): if node.is_running() and node is not node_to_check: stopped_nodes.append(node) node.stop(wait_other_notice=True) session = self.patient_cql_connection(node_to_check, 'ks') result = list(session.execute("SELECT * FROM cf LIMIT %d" % (rows * 2))) self.assertEqual(len(result), rows, len(result)) for k in found: query_c1c2(session, k, ConsistencyLevel.ONE) for k in missings: query = SimpleStatement("SELECT c1, c2 FROM cf WHERE key='k%d'" % k, consistency_level=ConsistencyLevel.ONE) res = list(session.execute(query)) self.assertEqual(len(filter(lambda x: len(x) != 0, res)), 0, res) if restart: for node in stopped_nodes: node.start(wait_other_notice=True)
def quorum_quorum_test(self): cluster = self.cluster cluster.populate(3).start() [node1, node2, node3] = cluster.nodelist() cursor1 = self.patient_cql_connection(node1).cursor() self.create_ks(cursor1, "ks", 3) create_c1c2_table(self, cursor1) cursor2 = self.patient_cql_connection(node2, "ks").cursor() # insert and get at CL.QUORUM for n in xrange(0, 100): insert_c1c2(cursor1, n, "QUORUM") query_c1c2(cursor2, n, "QUORUM") # shutdown a node an test again node3.stop(wait_other_notice=True) for n in xrange(100, 200): insert_c1c2(cursor1, n, "QUORUM") query_c1c2(cursor2, n, "QUORUM") # shutdown another node and test we get unavailabe exception node2.stop(wait_other_notice=True) assert_unavailable(insert_c1c2, cursor1, 200, "QUORUM")
def move_single_node_test(self): """ Test moving a node in a single-node cluster (#4200) """ cluster = self.cluster # Create an unbalanced ring cluster.populate(1, tokens=[0]).start() node1 = cluster.nodelist()[0] time.sleep(0.2) session = self.patient_cql_connection(node1) self.create_ks(session, 'ks', 1) self.create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=10000, consistency=ConsistencyLevel.ONE) cluster.flush() node1.move(2**25) time.sleep(1) cluster.cleanup() # Check we can get all the keys for n in xrange(0, 10000): query_c1c2(session, n, ConsistencyLevel.ONE)
def check_rows_on_node(self, node_to_check, rows, found=None, missings=None, restart=True): if found is None: found = [] if missings is None: missings = [] stopped_nodes = [] for node in self.cluster.nodes.values(): if node.is_running() and node is not node_to_check: stopped_nodes.append(node) node.stop(wait_other_notice=True) session = self.patient_cql_connection(node_to_check, 'ks') result = session.execute("SELECT * FROM cf LIMIT %d" % (rows * 2)) assert len(result) == rows, len(result) for k in found: query_c1c2(session, k, ConsistencyLevel.ONE) for k in missings: query = SimpleStatement("SELECT c1, c2 FROM cf WHERE key='k%d'" % k, consistency_level=ConsistencyLevel.ONE) res = session.execute(query) assert len(filter(lambda x: len(x) != 0, res)) == 0, res if restart: for node in stopped_nodes: node.start(wait_other_notice=True)
def hintedhandoff_test(self): cluster = self.cluster if DISABLE_VNODES: cluster.populate(2).start() else: tokens = cluster.balanced_tokens(2) cluster.populate(2, tokens=tokens).start() [node1, node2] = cluster.nodelist() cursor = self.patient_cql_connection(node1).cursor() self.create_ks(cursor, 'ks', 2) create_c1c2_table(self, cursor) node2.stop(wait_other_notice=True) for n in xrange(0, 100): insert_c1c2(cursor, n, "ONE") log_mark = node1.mark_log() node2.start() node1.watch_log_for(["Finished hinted"], from_mark=log_mark, timeout=90) node1.stop(wait_other_notice=True) # Check node2 for all the keys that should have been delivered via HH cursor = self.patient_cql_connection(node2, keyspace='ks').cursor() for n in xrange(0, 100): query_c1c2(cursor, n, "ONE")
def movement_test(self): cluster = self.cluster # Create an unbalanced ring cluster.populate(3, tokens=[0, 2**48, 2**62]).start() node1, node2, node3 = cluster.nodelist() session = self.patient_cql_connection(node1) self.create_ks(session, 'ks', 1) self.create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=10000, consistency=ConsistencyLevel.ONE) cluster.flush() # Move nodes to balance the cluster balancing_tokens = cluster.balanced_tokens(3) escformat = '%s' node1.move(escformat % balancing_tokens[0]) # can't assume 0 is balanced with m3p node2.move(escformat % balancing_tokens[1]) node3.move(escformat % balancing_tokens[2]) time.sleep(1) cluster.cleanup() # Check we can get all the keys for n in xrange(0, 10000): query_c1c2(session, n, ConsistencyLevel.ONE) # Now the load should be basically even sizes = [node.data_size() for node in [node1, node2, node3]] assert_almost_equal(sizes[0], sizes[1]) assert_almost_equal(sizes[0], sizes[2]) assert_almost_equal(sizes[1], sizes[2])
def readrepair_test(self): cluster = self.cluster cluster.set_configuration_options( values={'hinted_handoff_enabled': False}) if DISABLE_VNODES: cluster.populate(2).start() else: tokens = cluster.balanced_tokens(2) cluster.populate(2, tokens=tokens).start() node1, node2 = cluster.nodelist() session = self.patient_cql_connection(node1) self.create_ks(session, 'ks', 2) create_c1c2_table(self, session, read_repair=1.0) node2.stop(wait_other_notice=True) insert_c1c2(session, n=10000, consistency=ConsistencyLevel.ONE) node2.start(wait_other_notice=True) # query everything to cause RR for n in xrange(0, 10000): query_c1c2(session, n, ConsistencyLevel.QUORUM) node1.stop(wait_other_notice=True) # Check node2 for all the keys that should have been repaired session = self.patient_cql_connection(node2, keyspace='ks') for n in xrange(0, 10000): query_c1c2(session, n, ConsistencyLevel.ONE)
def readrepair_test(self): cluster = self.cluster cluster.set_configuration_options(values={"hinted_handoff_enabled": False}) if DISABLE_VNODES: cluster.populate(2).start() else: tokens = cluster.balanced_tokens(2) cluster.populate(2, tokens=tokens).start() [node1, node2] = cluster.nodelist() cursor = self.patient_cql_connection(node1).cursor() self.create_ks(cursor, "ks", 2) create_c1c2_table(self, cursor, read_repair=1.0) node2.stop(wait_other_notice=True) for n in xrange(0, 10000): insert_c1c2(cursor, n, "ONE") node2.start(wait_other_notice=True) time.sleep(5) # query everything to cause RR for n in xrange(0, 10000): query_c1c2(cursor, n, "QUORUM") node1.stop(wait_other_notice=True) # Check node2 for all the keys that should have been repaired cursor = self.patient_cql_connection(node2, keyspace="ks").cursor() for n in xrange(0, 10000): query_c1c2(cursor, n, "ONE")
def quorum_quorum_test(self): session, session2 = self.cl_cl_prepare(ConsistencyLevel.QUORUM, ConsistencyLevel.QUORUM) #Stop a node and retest self.cluster.nodelist()[2].stop() for n in xrange(0, 100): insert_c1c2(session, n, ConsistencyLevel.QUORUM) query_c1c2(session2, n, ConsistencyLevel.QUORUM) self.cluster.nodelist()[1].stop() assert_unavailable(insert_c1c2, session, 100, ConsistencyLevel.QUORUM)
def consistent_reads_after_bootstrap_test(self): debug("Creating a ring") cluster = self.cluster cluster.set_configuration_options(values={ 'hinted_handoff_enabled': False, 'write_request_timeout_in_ms': 60000, 'read_request_timeout_in_ms': 60000, 'dynamic_snitch_badness_threshold': 0.0 }, batch_commitlog=True) cluster.populate(2).start() node1, node2 = cluster.nodelist() cluster.start(wait_for_binary_proto=True, wait_other_notice=True) debug("Set to talk to node 2") n2session = self.patient_cql_connection(node2) self.create_ks(n2session, 'ks', 2) create_c1c2_table(self, n2session) debug("Generating some data for all nodes") insert_c1c2(n2session, keys=range(10, 20), consistency=ConsistencyLevel.ALL) node1.flush() debug("Taking down node1") node1.stop(wait_other_notice=True) debug("Writing data to only node2") insert_c1c2(n2session, keys=range(30, 1000), consistency=ConsistencyLevel.ONE) node2.flush() debug("Restart node1") node1.start(wait_other_notice=True) debug("Bootstraping node3") node3 = new_node(cluster) node3.start(wait_for_binary_proto=True) n3session = self.patient_cql_connection(node3) n3session.execute("USE ks") debug("Checking that no data was lost") for n in xrange(10, 20): query_c1c2(n3session, n, ConsistencyLevel.ALL) for n in xrange(30, 1000): query_c1c2(n3session, n, ConsistencyLevel.ALL)
def all_one_test(self): session, session2 = self.cl_cl_prepare(ConsistencyLevel.ALL, ConsistencyLevel.ONE) #Stop a node and retest self.cluster.nodelist()[2].stop() assert_unavailable(insert_c1c2, session, 100, ConsistencyLevel.ALL) for n in xrange(0, 100): query_c1c2(session2, n, ConsistencyLevel.ONE) #Stop a node and retest self.cluster.nodelist()[1].stop() assert_unavailable(insert_c1c2, session, 100, ConsistencyLevel.ALL) for n in xrange(0, 100): query_c1c2(session2, n, ConsistencyLevel.ONE)
def consistent_reads_after_move_test(self): debug("Creating a ring") cluster = self.cluster cluster.set_configuration_options(values={ 'hinted_handoff_enabled': False, 'write_request_timeout_in_ms': 60000, 'read_request_timeout_in_ms': 60000, 'dynamic_snitch_badness_threshold': 0.0 }, batch_commitlog=True) cluster.populate(3, tokens=[0, 2**48, 2**62]).start() [node1, node2, node3] = cluster.nodelist() cluster.start() debug("Set to talk to node 2") n2cursor = self.patient_cql_connection(node2).cursor() self.create_ks(n2cursor, 'ks', 2) create_c1c2_table(self, n2cursor) debug("Generating some data for all nodes") for n in xrange(10, 20): insert_c1c2(n2cursor, n, 'ALL') node1.flush() debug("Taking down node1") node1.stop(wait_other_notice=True) debug("Writing data to node2") for n in xrange(30, 1000): insert_c1c2(n2cursor, n, 'ONE') node2.flush() debug("Restart node1") node1.start(wait_other_notice=True) debug("Move token on node3") node3.move(2) debug("Checking that no data was lost") for n in xrange(10, 20): query_c1c2(n2cursor, n, 'ALL') for n in xrange(30, 1000): query_c1c2(n2cursor, n, 'ALL')
def non_local_read_test(self): """ This test reads from a coordinator we know has no copy of the data """ cluster = self.cluster cluster.populate(3).start() node1, node2, node3 = cluster.nodelist() session = self.patient_cql_connection(node1) self.create_ks(session, 'ks', 2) create_c1c2_table(self, session) # insert and get at CL.QUORUM (since RF=2, node1 won't have all key locally) tools.insert_c1c2(session, n=1000, consistency=ConsistencyLevel.QUORUM) for n in xrange(0, 1000): tools.query_c1c2(session, n, ConsistencyLevel.QUORUM)
def hintedhandoff_decom_test(self): self.cluster.populate(4).start(wait_for_binary_proto=True) [node1, node2, node3, node4] = self.cluster.nodelist() session = self.patient_cql_connection(node1) self.create_ks(session, 'ks', 2) create_c1c2_table(self, session) node4.stop(wait_other_notice=True) insert_c1c2(session, n=100, consistency=ConsistencyLevel.ONE) node1.decommission() node4.start(wait_for_binary_proto=True) node2.decommission() node3.decommission() time.sleep(5) for x in xrange(0, 100): query_c1c2(session, x, ConsistencyLevel.ONE)
def non_local_read_test(self): """ This test reads from a coordinator we know has no copy of the data """ cluster = self.cluster cluster.populate(3).start() [node1, node2, node3] = cluster.nodelist() cursor = self.cql_connection(node1).cursor() self.create_ks(cursor, 'ks', 2) self.create_cf(cursor, 'cf') # insert and get at CL.QUORUM (since RF=2, node1 won't have all key locally) for n in xrange(0, 1000): tools.insert_c1c2(cursor, n, "QUORUM") tools.query_c1c2(cursor, n, "QUORUM")
def non_local_read_test(self): """ This test reads from a coordinator we know has no copy of the data """ cluster = self.cluster cluster.populate(3).start() node1, node2, node3 = cluster.nodelist() cursor = self.patient_cql_connection(node1) self.create_ks(cursor, 'ks', 2) create_c1c2_table(self, cursor) # insert and get at CL.QUORUM (since RF=2, node1 won't have all key locally) for n in xrange(0, 1000): tools.insert_c1c2(cursor, n, ConsistencyLevel.QUORUM) tools.query_c1c2(cursor, n, ConsistencyLevel.QUORUM)
def simple_bootstrap_test(self): cluster = self.cluster tokens = cluster.balanced_tokens(2) cluster.set_configuration_options(values={'num_tokens': 1}) debug("[node1, node2] tokens: %r" % (tokens, )) keys = 10000 # Create a single node cluster cluster.populate(1) node1 = cluster.nodelist()[0] node1.set_configuration_options(values={'initial_token': tokens[0]}) cluster.start(wait_other_notice=True) session = self.patient_cql_connection(node1) self.create_ks(session, 'ks', 1) self.create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) # record the size before inserting any of our own data empty_size = node1.data_size() debug("node1 empty size : %s" % float(empty_size)) insert_statement = session.prepare( "INSERT INTO ks.cf (key, c1, c2) VALUES (?, 'value1', 'value2')") execute_concurrent_with_args(session, insert_statement, [['k%d' % k] for k in range(keys)]) node1.flush() node1.compact() initial_size = node1.data_size() debug("node1 size before bootstrapping node2: %s" % float(initial_size)) # Reads inserted data all during the bootstrap process. We shouldn't # get any error reader = self.go(lambda _: query_c1c2( session, random.randint(0, keys - 1), ConsistencyLevel.ONE)) # Bootstrapping a new node node2 = new_node(cluster) node2.set_configuration_options(values={'initial_token': tokens[1]}) node2.start(wait_for_binary_proto=True) node2.compact() reader.check() node1.cleanup() debug("node1 size after cleanup: %s" % float(node1.data_size())) node1.compact() debug("node1 size after compacting: %s" % float(node1.data_size())) time.sleep(.5) reader.check() debug("node2 size after compacting: %s" % float(node2.data_size())) size1 = float(node1.data_size()) size2 = float(node2.data_size()) assert_almost_equal(size1, size2, error=0.3) assert_almost_equal(float(initial_size - empty_size), 2 * (size1 - float(empty_size)))
def one_one_test(self): session, session2 = self.cl_cl_prepare( ConsistencyLevel.ONE, ConsistencyLevel.ONE, tolerate_missing=True) #Stop a node and retest self.cluster.nodelist()[2].stop() for n in xrange(0, 100): insert_c1c2(session, n, ConsistencyLevel.ONE) query_c1c2(session2, n, ConsistencyLevel.ONE, tolerate_missing=True) #Stop a node and retest self.cluster.nodelist()[1].stop() for n in xrange(0, 100): insert_c1c2(session, n, ConsistencyLevel.ONE) query_c1c2(session2, n, ConsistencyLevel.ONE, tolerate_missing=False)
def cl_cl_prepare(self, write_cl, read_cl, tolerate_missing=False): cluster = self.cluster cluster.populate(3).start() node1, node2, node3 = cluster.nodelist() session = self.patient_cql_connection(node1) self.create_ks(session, 'ks', 3) create_c1c2_table(self, session) session2 = self.patient_cql_connection(node2, 'ks') # insert and get at CL.QUORUM for n in xrange(0, 100): insert_c1c2(session, n, write_cl) query_c1c2(session2, n, read_cl, tolerate_missing) return session, session2
def movement_test(self): cluster = self.cluster # Create an unbalanced ring cluster.populate(3, tokens=[0, 2**48, 2**62]).start() node1, node2, node3 = cluster.nodelist() session = self.patient_cql_connection(node1) self.create_ks(session, 'ks', 1) self.create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=30000, consistency=ConsistencyLevel.ONE) cluster.flush() # Move nodes to balance the cluster def move_node(node, token, ip): mark = node.mark_log() node.move(token) # can't assume 0 is balanced with m3p node.watch_log_for('{} state jump to NORMAL'.format(ip), from_mark=mark, timeout=180) time.sleep(3) balancing_tokens = cluster.balanced_tokens(3) move_node(node1, balancing_tokens[0], '127.0.0.1') move_node(node2, balancing_tokens[1], '127.0.0.2') move_node(node3, balancing_tokens[2], '127.0.0.3') time.sleep(1) cluster.cleanup() # Check we can get all the keys for n in xrange(0, 30000): query_c1c2(session, n, ConsistencyLevel.ONE) # Now the load should be basically even sizes = [node.data_size() for node in [node1, node2, node3]] assert_almost_equal(sizes[0], sizes[1]) assert_almost_equal(sizes[0], sizes[2]) assert_almost_equal(sizes[1], sizes[2])
def check_rows_on_node(self, node_to_check, rows, found=None, missings=None, restart=True): """ Function to verify the rows on a given node, without interference from the other nodes in the cluster @param node_to_check The given node to check. Should be the node, not the index @param rows The number of rows we expect @param found A list of partition keys that we expect to be on the node @param missings A list of partition keys we expect NOT to be on the node @param restart Whether or not we should restart the nodes we shut down to perform the assertions. Should only be False if the call to check_rows_on_node is the last line in the test. """ if found is None: found = [] if missings is None: missings = [] stopped_nodes = [] for node in self.cluster.nodes.values(): if node.is_running() and node is not node_to_check: stopped_nodes.append(node) node.stop(wait_other_notice=True) session = self.patient_cql_connection(node_to_check, 'ks') result = list( session.execute("SELECT * FROM cf LIMIT {}".format(rows * 2))) self.assertEqual(len(result), rows) for k in found: query_c1c2(session, k, ConsistencyLevel.ONE) for k in missings: query = SimpleStatement( "SELECT c1, c2 FROM cf WHERE key='k{}'".format(k), consistency_level=ConsistencyLevel.ONE) res = list(session.execute(query)) self.assertEqual(len(filter(lambda x: len(x) != 0, res)), 0, res) if restart: for node in stopped_nodes: node.start(wait_other_notice=True)
def consistent_reads_after_bootstrap_test(self): debug("Creating a ring") cluster = self.cluster cluster.set_configuration_options(values={'hinted_handoff_enabled': False, 'write_request_timeout_in_ms': 60000, 'read_request_timeout_in_ms': 60000, 'dynamic_snitch_badness_threshold': 0.0}, batch_commitlog=True) cluster.populate(2).start() node1, node2 = cluster.nodelist() cluster.start(wait_for_binary_proto=True, wait_other_notice=True) debug("Set to talk to node 2") n2session = self.patient_cql_connection(node2) self.create_ks(n2session, 'ks', 2) create_c1c2_table(self, n2session) debug("Generating some data for all nodes") for n in xrange(10, 20): insert_c1c2(n2session, n, ConsistencyLevel.ALL) node1.flush() debug("Taking down node1") node1.stop(wait_other_notice=True) debug("Writing data to only node2") for n in xrange(30, 1000): insert_c1c2(n2session, n, ConsistencyLevel.ONE) node2.flush() debug("Restart node1") node1.start(wait_other_notice=True) debug("Boostraping node3") node3 = new_node(cluster) node3.start(wait_for_binary_proto=True) n3session = self.patient_cql_connection(node3) n3session.execute("USE ks") debug("Checking that no data was lost") for n in xrange(10, 20): query_c1c2(n3session, n, ConsistencyLevel.ALL) for n in xrange(30, 1000): query_c1c2(n3session, n, ConsistencyLevel.ALL)
def simple_bootstrap_test(self): cluster = self.cluster tokens = cluster.balanced_tokens(2) cluster.set_configuration_options(values={'num_tokens': 1}) debug("[node1, node2] tokens: %r" % (tokens,)) keys = 10000 # Create a single node cluster cluster.populate(1) node1 = cluster.nodelist()[0] node1.set_configuration_options(values={'initial_token': tokens[0]}) cluster.start(wait_other_notice=True) session = self.patient_cql_connection(node1) self.create_ks(session, 'ks', 1) self.create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) # record the size before inserting any of our own data empty_size = node1.data_size() debug("node1 empty size : %s" % float(empty_size)) insert_statement = session.prepare("INSERT INTO ks.cf (key, c1, c2) VALUES (?, 'value1', 'value2')") execute_concurrent_with_args(session, insert_statement, [['k%d' % k] for k in range(keys)]) node1.flush() node1.compact() initial_size = node1.data_size() debug("node1 size before bootstrapping node2: %s" % float(initial_size)) # Reads inserted data all during the bootstrap process. We shouldn't # get any error reader = self.go(lambda _: query_c1c2(session, random.randint(0, keys - 1), ConsistencyLevel.ONE)) # Bootstrapping a new node node2 = new_node(cluster) node2.set_configuration_options(values={'initial_token': tokens[1]}) node2.start(wait_for_binary_proto=True) node2.compact() reader.check() node1.cleanup() debug("node1 size after cleanup: %s" % float(node1.data_size())) node1.compact() debug("node1 size after compacting: %s" % float(node1.data_size())) time.sleep(.5) reader.check() debug("node2 size after compacting: %s" % float(node2.data_size())) size1 = float(node1.data_size()) size2 = float(node2.data_size()) assert_almost_equal(size1, size2, error=0.3) assert_almost_equal(float(initial_size - empty_size), 2 * (size1 - float(empty_size)))
def movement_test(self): cluster = self.cluster # Create an unbalanced ring cluster.populate(3, tokens=[0, 2**48, 2**62]).start() node1, node2, node3 = cluster.nodelist() session = self.patient_cql_connection(node1) self.create_ks(session, 'ks', 1) self.create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=10000, consistency=ConsistencyLevel.ONE) cluster.flush() # Move nodes to balance the cluster def move_node(node, token, ip): mark = node.mark_log() node.move(token) # can't assume 0 is balanced with m3p node.watch_log_for('{} state jump to NORMAL'.format(ip), from_mark=mark, timeout=180) time.sleep(3) balancing_tokens = cluster.balanced_tokens(3) move_node(node1, balancing_tokens[0], '127.0.0.1') move_node(node2, balancing_tokens[1], '127.0.0.2') move_node(node3, balancing_tokens[2], '127.0.0.3') time.sleep(1) cluster.cleanup() # Check we can get all the keys for n in xrange(0, 10000): query_c1c2(session, n, ConsistencyLevel.ONE) # Now the load should be basically even sizes = [node.data_size() for node in [node1, node2, node3]] assert_almost_equal(sizes[0], sizes[1]) assert_almost_equal(sizes[0], sizes[2]) assert_almost_equal(sizes[1], sizes[2])
def all_one_test(self): cluster = self.cluster cluster.populate(3).start() [node1, node2, node3] = cluster.nodelist() cursor1 = self.patient_cql_connection(node1).cursor() self.create_ks(cursor1, "ks", 3) create_c1c2_table(self, cursor1) cursor2 = self.patient_cql_connection(node2, "ks").cursor() # insert and get at CL.ONE for n in xrange(0, 100): insert_c1c2(cursor1, n, "ALL") query_c1c2(cursor2, n, "ONE") # shutdown a node an test again node3.stop(wait_other_notice=True) assert_unavailable(insert_c1c2, cursor1, 100, "ALL")
def all_all_test(self): cluster = self.cluster cluster.populate(3).start() [node1, node2, node3] = cluster.nodelist() cursor1 = self.cql_connection(node1).cursor() self.create_ks(cursor1, 'ks', 3) create_c1c2_table(self, cursor1) cursor2 = self.cql_connection(node2, 'ks').cursor() # insert and get at CL.ALL for n in xrange(0, 100): insert_c1c2(cursor1, n, "ALL") query_c1c2(cursor2, n, "ALL") # shutdown one node and test we get unavailabe exception node3.stop(wait_other_notice=True) assert_unavailable(insert_c1c2, cursor1, 100, "ALL")
def all_one_test(self): cluster = self.cluster cluster.populate(3).start() [node1, node2, node3] = cluster.nodelist() cursor1 = self.patient_cql_connection(node1).cursor() self.create_ks(cursor1, 'ks', 3) create_c1c2_table(self, cursor1) cursor2 = self.patient_cql_connection(node2, 'ks').cursor() # insert and get at CL.ONE for n in xrange(0, 100): insert_c1c2(cursor1, n, "ALL") query_c1c2(cursor2, n, "ONE") # shutdown a node an test again node3.stop(wait_other_notice=True) assert_unavailable(insert_c1c2, cursor1, 100, "ALL")
def blacklisted_directory_test(self): cluster = self.cluster cluster.set_datadir_count(3) cluster.populate(1) [node] = cluster.nodelist() remove_perf_disable_shared_mem(node) cluster.start(wait_for_binary_proto=True) session = self.patient_cql_connection(node) self.create_ks(session, 'ks', 1) create_c1c2_table(self, session) insert_c1c2(session, n=10000) node.flush() for k in xrange(0, 10000): query_c1c2(session, k) node.compact() mbean = make_mbean('db', type='BlacklistedDirectories') with JolokiaAgent(node) as jmx: jmx.execute_method(mbean, 'markUnwritable', [os.path.join(node.get_path(), 'data0')]) for k in xrange(0, 10000): query_c1c2(session, k) node.nodetool('relocatesstables') for k in xrange(0, 10000): query_c1c2(session, k)
def decommission_test(self): cluster = self.cluster tokens = cluster.balanced_tokens(4) cluster.populate(4, tokens=tokens).start() node1, node2, node3, node4 = cluster.nodelist() session = self.patient_cql_connection(node1) self.create_ks(session, 'ks', 2) self.create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=30000, consistency=ConsistencyLevel.QUORUM) cluster.flush() sizes = [ node.data_size() for node in cluster.nodelist() if node.is_running() ] init_size = sizes[0] assert_almost_equal(*sizes) time.sleep(.5) node4.decommission() node4.stop() cluster.cleanup() time.sleep(.5) # Check we can get all the keys for n in xrange(0, 30000): query_c1c2(session, n, ConsistencyLevel.QUORUM) sizes = [ node.data_size() for node in cluster.nodelist() if node.is_running() ] debug(sizes) assert_almost_equal(sizes[0], sizes[1]) assert_almost_equal((2.0 / 3.0) * sizes[0], sizes[2]) assert_almost_equal(sizes[2], init_size)
def consistent_reads_after_move_test(self): debug("Creating a ring") cluster = self.cluster cluster.set_configuration_options(values={ 'hinted_handoff_enabled' : False, 'write_request_timeout_in_ms' : 60000, 'read_request_timeout_in_ms' : 60000, 'dynamic_snitch_badness_threshold' : 0.0}, batch_commitlog=True) cluster.populate(3, tokens=[0, 2**48, 2**62]).start() [node1, node2, node3] = cluster.nodelist() cluster.start() debug("Set to talk to node 2") n2cursor = self.patient_cql_connection(node2) self.create_ks(n2cursor, 'ks', 2) create_c1c2_table(self, n2cursor) debug("Generating some data for all nodes") for n in xrange(10,20): insert_c1c2(n2cursor, n, ConsistencyLevel.ALL) node1.flush() debug("Taking down node1") node1.stop(wait_other_notice=True) debug("Writing data to node2") for n in xrange(30,1000): insert_c1c2(n2cursor, n, ConsistencyLevel.ONE) node2.flush() debug("Restart node1") node1.start(wait_other_notice=True) debug("Move token on node3") node3.move(2) debug("Checking that no data was lost") for n in xrange(10,20): query_c1c2(n2cursor, n, ConsistencyLevel.ALL) for n in xrange(30,1000): query_c1c2(n2cursor, n, ConsistencyLevel.ALL)
def check_rows_on_node(self, node_to_check, rows, found=None, missings=None, restart=True): """ Function to verify the rows on a given node, without interference from the other nodes in the cluster @param node_to_check The given node to check. Should be the node, not the index @param rows The number of rows we expect @param found A list of partition keys that we expect to be on the node @param missings A list of partition keys we expect NOT to be on the node @param restart Whether or not we should restart the nodes we shut down to perform the assertions. Should only be False if the call to check_rows_on_node is the last line in the test. """ if found is None: found = [] if missings is None: missings = [] stopped_nodes = [] for node in self.cluster.nodes.values(): if node.is_running() and node is not node_to_check: stopped_nodes.append(node) node.stop(wait_other_notice=True) session = self.patient_cql_connection(node_to_check, "ks") result = list(session.execute("SELECT * FROM cf LIMIT {}".format(rows * 2))) self.assertEqual(len(result), rows) for k in found: query_c1c2(session, k, ConsistencyLevel.ONE) for k in missings: query = SimpleStatement( "SELECT c1, c2 FROM cf WHERE key='k{}'".format(k), consistency_level=ConsistencyLevel.ONE ) res = list(session.execute(query)) self.assertEqual(len(filter(lambda x: len(x) != 0, res)), 0, res) if restart: for node in stopped_nodes: node.start(wait_other_notice=True)
def movement_test(self): cluster = self.cluster # Create an unbalanced ring cluster.populate(3, tokens=[0, 2**48, 2**62]).start() node1, node2, node3 = cluster.nodelist() session = self.patient_cql_connection(node1) self.create_ks(session, 'ks', 1) self.create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=10000, consistency=ConsistencyLevel.ONE) cluster.flush() # Move nodes to balance the cluster balancing_tokens = cluster.balanced_tokens(3) escformat = '\\%s' if cluster.version() >= '2.1': escformat = '%s' node1.move(escformat % balancing_tokens[0]) # can't assume 0 is balanced with m3p node2.move(escformat % balancing_tokens[1]) node3.move(escformat % balancing_tokens[2]) time.sleep(1) cluster.cleanup() # Check we can get all the keys for n in xrange(0, 10000): query_c1c2(session, n, ConsistencyLevel.ONE) # Now the load should be basically even sizes = [node.data_size() for node in [node1, node2, node3]] assert_almost_equal(sizes[0], sizes[1]) assert_almost_equal(sizes[0], sizes[2]) assert_almost_equal(sizes[1], sizes[2])
def simple_rebuild_test(self): """ @jira_ticket CASSANDRA-9119 Test rebuild from other dc works as expected. """ keys = 1000 cluster = self.cluster cluster.set_configuration_options(values={ 'endpoint_snitch': 'org.apache.cassandra.locator.PropertyFileSnitch' }) node1 = cluster.create_node('node1', False, ('127.0.0.1', 9160), ('127.0.0.1', 7000), '7100', '2000', None, binary_interface=('127.0.0.1', 9042)) cluster.add(node1, True, data_center='dc1') # start node in dc1 node1.start(wait_for_binary_proto=True) # populate data in dc1 session = self.patient_exclusive_cql_connection(node1) self.create_ks(session, 'ks', {'dc1': 1}) self.create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=keys, consistency=ConsistencyLevel.LOCAL_ONE) # check data for i in xrange(0, keys): query_c1c2(session, i, ConsistencyLevel.LOCAL_ONE) session.shutdown() # Bootstrapping a new node in dc2 with auto_bootstrap: false node2 = cluster.create_node('node2', False, ('127.0.0.2', 9160), ('127.0.0.2', 7000), '7200', '2001', None, binary_interface=('127.0.0.2', 9042)) cluster.add(node2, False, data_center='dc2') node2.start(wait_other_notice=True, wait_for_binary_proto=True) # wait for snitch to reload time.sleep(60) # alter keyspace to replicate to dc2 session = self.patient_exclusive_cql_connection(node2) session.execute( "ALTER KEYSPACE ks WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1};" ) # alter system_auth -- rebuilding it no longer possible after # CASSANDRA-11848 prevented local node from being considered a source session.execute( "ALTER KEYSPACE system_auth WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1};" ) session.execute('USE ks') self.rebuild_errors = 0 # rebuild dc2 from dc1 def rebuild(): try: node2.nodetool('rebuild dc1') except ToolError as e: if 'Node is still rebuilding' in e.stdout: self.rebuild_errors += 1 else: raise e class Runner(Thread): def __init__(self, func): Thread.__init__(self) self.func = func self.thread_exc_info = None def run(self): """ Closes over self to catch any exceptions raised by func and register them at self.thread_exc_info Based on http://stackoverflow.com/a/1854263 """ try: self.func() except Exception: import sys self.thread_exc_info = sys.exc_info() cmd1 = Runner(rebuild) cmd1.start() # concurrent rebuild should not be allowed (CASSANDRA-9119) # (following sleep is needed to avoid conflict in 'nodetool()' method setting up env.) time.sleep(.1) # we don't need to manually raise exeptions here -- already handled rebuild() cmd1.join() # manually raise exception from cmd1 thread # see http://stackoverflow.com/a/1854263 if cmd1.thread_exc_info is not None: raise cmd1.thread_exc_info[1], None, cmd1.thread_exc_info[2] # exactly 1 of the two nodetool calls should fail # usually it will be the one in the main thread, # but occasionally it wins the race with the one in the secondary thread, # so we check that one succeeded and the other failed self.assertEqual( self.rebuild_errors, 1, msg= 'rebuild errors should be 1, but found {}. Concurrent rebuild should not be allowed, but one rebuild command should have succeeded.' .format(self.rebuild_errors)) # check data for i in xrange(0, keys): query_c1c2(session, i, ConsistencyLevel.LOCAL_ONE)
def decomission_test(self): cluster = self.cluster tokens = cluster.balanced_tokens(4) cluster.populate(4, tokens=tokens).start() node1, node2, node3, node4 = cluster.nodelist() cursor = self.patient_cql_connection(node1) self.create_ks(cursor, 'ks', 2) self.create_cf(cursor, 'cf',columns={'c1': 'text', 'c2': 'text'}) for n in xrange(0, 10000): insert_c1c2(cursor, n, ConsistencyLevel.QUORUM) cluster.flush() sizes = [ node.data_size() for node in cluster.nodelist() if node.is_running()] init_size = sizes[0] assert_almost_equal(*sizes) time.sleep(.5) node4.decommission() node4.stop() cluster.cleanup() time.sleep(.5) # Check we can get all the keys for n in xrange(0, 10000): query_c1c2(cursor, n, ConsistencyLevel.QUORUM) sizes = [ node.data_size() for node in cluster.nodelist() if node.is_running() ] three_node_sizes = sizes assert_almost_equal(sizes[0], sizes[1]) assert_almost_equal((2.0/3.0) * sizes[0], sizes[2]) assert_almost_equal(sizes[2], init_size) if cluster.version() <= '1.2': node3.stop(wait_other_notice=True) node1.removeToken(tokens[2]) time.sleep(.5) cluster.cleanup() time.sleep(.5) # Check we can get all the keys for n in xrange(0, 10000): query_c1c2(cursor, n, ConsistencyLevel.QUORUM) sizes = [ node.data_size() for node in cluster.nodelist() if node.is_running() ] assert_almost_equal(*sizes) assert_almost_equal(sizes[0], 2 * init_size) node5 = new_node(cluster, token=(tokens[2]+1)).start() time.sleep(.5) cluster.cleanup() time.sleep(.5) cluster.compact() time.sleep(.5) # Check we can get all the keys for n in xrange(0, 10000): query_c1c2(cursor, n, ConsistencyLevel.QUORUM) sizes = [ node.data_size() for node in cluster.nodelist() if node.is_running() ] # We should be back to the earlir 3 nodes situation for i in xrange(0, len(sizes)): assert_almost_equal(sizes[i], three_node_sizes[i])
def taketoken_test(self): debug("Creating a ring") cluster = self.cluster cluster.set_configuration_options(values={ 'initial_token': None, 'num_tokens': 10, 'hinted_handoff_enabled': False, 'write_request_timeout_in_ms': 60000, 'read_request_timeout_in_ms': 60000, 'dynamic_snitch_badness_threshold': 0.0 }, batch_commitlog=True) cluster.populate(3).start() [node1, node2, node3] = cluster.nodelist() cluster.start() debug("Set to talk to node 2") n2cursor = self.patient_cql_connection(node2).cursor() self.create_ks(n2cursor, 'ks', 2) create_c1c2_table(self, n2cursor) debug("Generating some data for all nodes") for n in xrange(10, 20): insert_c1c2(n2cursor, n, 'ALL') node1.flush() debug("Writing data to node2") for n in xrange(30, 1000): insert_c1c2(n2cursor, n, 'ONE') node2.flush() debug("Getting token from node 1") n1cursor = self.patient_cql_connection(node1).cursor() n1cursor.execute('SELECT tokens FROM system.local') n1tokens = n1cursor.fetchone() n3cursor = self.patient_cql_connection(node3).cursor() n3cursor.execute('SELECT tokens FROM system.local') n3tokens = n3cursor.fetchone() debug("Relocate tokens from node1 to node3") i = 0 tl = "" for t in n1tokens[0]: if i == 8: break t = '\\%s' % t tl = "%s %s" % (tl, t) i += 1 cmd = "taketoken %s" % tl debug(cmd) node3.nodetool(cmd) time.sleep(1) debug("Check that the tokens were really moved") n3cursor.execute('SELECT tokens FROM system.local') n3tokens = n3cursor.fetchone() n1cursor.execute('SELECT tokens FROM system.local') n1tokens = n1cursor.fetchone() debug("n1 %s n3 %s" % (n1tokens, n3tokens)) assert len(n3tokens[0]) == 18 assert len(n1tokens[0]) == 2 debug("Checking that no data was lost") for n in xrange(10, 20): query_c1c2(n2cursor, n, 'ALL') for n in xrange(30, 1000): query_c1c2(n2cursor, n, 'ALL')
def decommission_test(self): cluster = self.cluster tokens = cluster.balanced_tokens(4) cluster.populate(4, tokens=tokens).start() node1, node2, node3, node4 = cluster.nodelist() session = self.patient_cql_connection(node1) self.create_ks(session, 'ks', 2) self.create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=10000, consistency=ConsistencyLevel.QUORUM) cluster.flush() sizes = [node.data_size() for node in cluster.nodelist() if node.is_running()] init_size = sizes[0] assert_almost_equal(*sizes) time.sleep(.5) node4.decommission() node4.stop() cluster.cleanup() time.sleep(.5) # Check we can get all the keys for n in xrange(0, 10000): query_c1c2(session, n, ConsistencyLevel.QUORUM) sizes = [node.data_size() for node in cluster.nodelist() if node.is_running()] three_node_sizes = sizes assert_almost_equal(sizes[0], sizes[1]) assert_almost_equal((2.0 / 3.0) * sizes[0], sizes[2]) assert_almost_equal(sizes[2], init_size) if cluster.version() <= '1.2': node3.stop(wait_other_notice=True) node1.removeToken(tokens[2]) time.sleep(.5) cluster.cleanup() time.sleep(.5) # Check we can get all the keys for n in xrange(0, 10000): query_c1c2(session, n, ConsistencyLevel.QUORUM) sizes = [node.data_size() for node in cluster.nodelist() if node.is_running()] assert_almost_equal(*sizes) assert_almost_equal(sizes[0], 2 * init_size) node5 = new_node(cluster, token=(tokens[2] + 1)).start() time.sleep(.5) cluster.cleanup() time.sleep(.5) cluster.compact() time.sleep(.5) # Check we can get all the keys for n in xrange(0, 10000): query_c1c2(session, n, ConsistencyLevel.QUORUM) sizes = [node.data_size() for node in cluster.nodelist() if node.is_running()] # We should be back to the earlir 3 nodes situation for i in xrange(0, len(sizes)): assert_almost_equal(sizes[i], three_node_sizes[i])
def rebuild_ranges_test(self): """ @jira_ticket CASSANDRA-10406 """ keys = 1000 cluster = self.cluster tokens = cluster.balanced_tokens_across_dcs(['dc1', 'dc2']) cluster.set_configuration_options(values={ 'endpoint_snitch': 'org.apache.cassandra.locator.PropertyFileSnitch' }) cluster.set_configuration_options(values={'num_tokens': 1}) node1 = cluster.create_node('node1', False, ('127.0.0.1', 9160), ('127.0.0.1', 7000), '7100', '2000', tokens[0], binary_interface=('127.0.0.1', 9042)) node1.set_configuration_options(values={'initial_token': tokens[0]}) cluster.add(node1, True, data_center='dc1') node1 = cluster.nodelist()[0] # start node in dc1 node1.start(wait_for_binary_proto=True) # populate data in dc1 session = self.patient_exclusive_cql_connection(node1) # ks1 will be rebuilt in node2 self.create_ks(session, 'ks1', {'dc1': 1}) self.create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=keys, consistency=ConsistencyLevel.ALL) # ks2 will not be rebuilt in node2 self.create_ks(session, 'ks2', {'dc1': 1}) self.create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=keys, consistency=ConsistencyLevel.ALL) session.shutdown() # Bootstraping a new node in dc2 with auto_bootstrap: false node2 = cluster.create_node('node2', False, ('127.0.0.2', 9160), ('127.0.0.2', 7000), '7200', '2001', tokens[1], binary_interface=('127.0.0.2', 9042)) node2.set_configuration_options(values={'initial_token': tokens[1]}) cluster.add(node2, False, data_center='dc2') node2.start(wait_other_notice=True, wait_for_binary_proto=True) # wait for snitch to reload time.sleep(60) # alter keyspace to replicate to dc2 session = self.patient_exclusive_cql_connection(node2) session.execute( "ALTER KEYSPACE ks1 WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1};" ) session.execute( "ALTER KEYSPACE ks2 WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1};" ) session.execute('USE ks1') # rebuild only ks1 with range that is node1's replica node2.nodetool('rebuild -ks ks1 -ts (%s,%s] dc1' % (tokens[1], str(pow(2, 63) - 1))) # check data is sent by stopping node1 node1.stop() for i in xrange(0, keys): query_c1c2(session, i, ConsistencyLevel.ONE) # ks2 should not be streamed session.execute('USE ks2') for i in xrange(0, keys): query_c1c2(session, i, ConsistencyLevel.ONE, tolerate_missing=True, must_be_missing=True)
def rebuild_ranges_test(self): """ @jira_ticket CASSANDRA-10406 """ keys = 1000 cluster = self.cluster tokens = cluster.balanced_tokens_across_dcs(['dc1', 'dc2']) cluster.set_configuration_options(values={'endpoint_snitch': 'org.apache.cassandra.locator.PropertyFileSnitch'}) cluster.set_configuration_options(values={'num_tokens': 1}) node1 = cluster.create_node('node1', False, ('127.0.0.1', 9160), ('127.0.0.1', 7000), '7100', '2000', tokens[0], binary_interface=('127.0.0.1', 9042)) node1.set_configuration_options(values={'initial_token': tokens[0]}) cluster.add(node1, True, data_center='dc1') node1 = cluster.nodelist()[0] # start node in dc1 node1.start(wait_for_binary_proto=True) # populate data in dc1 session = self.patient_exclusive_cql_connection(node1) # ks1 will be rebuilt in node2 self.create_ks(session, 'ks1', {'dc1': 1}) self.create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=keys, consistency=ConsistencyLevel.ALL) # ks2 will not be rebuilt in node2 self.create_ks(session, 'ks2', {'dc1': 1}) self.create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=keys, consistency=ConsistencyLevel.ALL) session.shutdown() # Bootstraping a new node in dc2 with auto_bootstrap: false node2 = cluster.create_node('node2', False, ('127.0.0.2', 9160), ('127.0.0.2', 7000), '7200', '2001', tokens[1], binary_interface=('127.0.0.2', 9042)) node2.set_configuration_options(values={'initial_token': tokens[1]}) cluster.add(node2, False, data_center='dc2') node2.start(wait_other_notice=True, wait_for_binary_proto=True) # wait for snitch to reload time.sleep(60) # alter keyspace to replicate to dc2 session = self.patient_exclusive_cql_connection(node2) session.execute("ALTER KEYSPACE ks1 WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1};") session.execute("ALTER KEYSPACE ks2 WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1};") session.execute('USE ks1') # rebuild only ks1 with range that is node1's replica node2.nodetool('rebuild -ks ks1 -ts (%s,%s] dc1' % (tokens[1], str(pow(2, 63) - 1))) # check data is sent by stopping node1 node1.stop() for i in xrange(0, keys): query_c1c2(session, i, ConsistencyLevel.ONE) # ks2 should not be streamed session.execute('USE ks2') for i in xrange(0, keys): query_c1c2(session, i, ConsistencyLevel.ONE, tolerate_missing=True, must_be_missing=True)
def simple_rebuild_test(self): """ @jira_ticket CASSANDRA-9119 Test rebuild from other dc works as expected. """ keys = 1000 cluster = self.cluster cluster.set_configuration_options(values={'endpoint_snitch': 'org.apache.cassandra.locator.PropertyFileSnitch'}) node1 = cluster.create_node('node1', False, ('127.0.0.1', 9160), ('127.0.0.1', 7000), '7100', '2000', None, binary_interface=('127.0.0.1', 9042)) cluster.add(node1, True, data_center='dc1') # start node in dc1 node1.start(wait_for_binary_proto=True) # populate data in dc1 session = self.patient_exclusive_cql_connection(node1) self.create_ks(session, 'ks', {'dc1': 1}) self.create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=keys, consistency=ConsistencyLevel.LOCAL_ONE) # check data for i in xrange(0, keys): query_c1c2(session, i, ConsistencyLevel.LOCAL_ONE) session.shutdown() # Bootstrapping a new node in dc2 with auto_bootstrap: false node2 = cluster.create_node('node2', False, ('127.0.0.2', 9160), ('127.0.0.2', 7000), '7200', '2001', None, binary_interface=('127.0.0.2', 9042)) cluster.add(node2, False, data_center='dc2') node2.start(wait_other_notice=True, wait_for_binary_proto=True) # wait for snitch to reload time.sleep(60) # alter keyspace to replicate to dc2 session = self.patient_exclusive_cql_connection(node2) session.execute("ALTER KEYSPACE ks WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1};") # alter system_auth -- rebuilding it no longer possible after # CASSANDRA-11848 prevented local node from being considered a source session.execute("ALTER KEYSPACE system_auth WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1};") session.execute('USE ks') self.rebuild_errors = 0 # rebuild dc2 from dc1 def rebuild(): try: node2.nodetool('rebuild dc1') except ToolError as e: if 'Node is still rebuilding' in e.stdout: self.rebuild_errors += 1 else: raise e class Runner(Thread): def __init__(self, func): Thread.__init__(self) self.func = func self.thread_exc_info = None def run(self): """ Closes over self to catch any exceptions raised by func and register them at self.thread_exc_info Based on http://stackoverflow.com/a/1854263 """ try: self.func() except Exception: import sys self.thread_exc_info = sys.exc_info() cmd1 = Runner(rebuild) cmd1.start() # concurrent rebuild should not be allowed (CASSANDRA-9119) # (following sleep is needed to avoid conflict in 'nodetool()' method setting up env.) time.sleep(.1) # we don't need to manually raise exeptions here -- already handled rebuild() cmd1.join() # manually raise exception from cmd1 thread # see http://stackoverflow.com/a/1854263 if cmd1.thread_exc_info is not None: raise cmd1.thread_exc_info[1], None, cmd1.thread_exc_info[2] # exactly 1 of the two nodetool calls should fail # usually it will be the one in the main thread, # but occasionally it wins the race with the one in the secondary thread, # so we check that one succeeded and the other failed self.assertEqual(self.rebuild_errors, 1, msg='rebuild errors should be 1, but found {}. Concurrent rebuild should not be allowed, but one rebuild command should have succeeded.'.format(self.rebuild_errors)) # check data for i in xrange(0, keys): query_c1c2(session, i, ConsistencyLevel.LOCAL_ONE)
def consistent_reads_after_relocate_test(self): debug("Creating a ring") cluster = self.cluster cluster.set_configuration_options(values={ 'initial_token': None, 'num_tokens': 10, 'hinted_handoff_enabled': False, 'write_request_timeout_in_ms': 60000, 'read_request_timeout_in_ms': 60000, 'dynamic_snitch_badness_threshold': 0.0 }, batch_commitlog=True) cluster.populate(3).start() [node1, node2, node3] = cluster.nodelist() cluster.start() debug("Set to talk to node 2") n2cursor = self.patient_cql_connection(node2).cursor() self.create_ks(n2cursor, 'ks', 2) create_c1c2_table(self, n2cursor) debug("Generating some data for all nodes") for n in xrange(10, 20): insert_c1c2(n2cursor, n, 'ALL') node1.flush() debug("Taking down node1") node3.stop(wait_other_notice=True) debug("Writing data to node2") for n in xrange(30, 1000): insert_c1c2(n2cursor, n, 'ONE') node2.flush() debug("Restart node1") node3.start(wait_other_notice=True) debug("Getting token from node 1") n1cursor = self.patient_cql_connection(node1).cursor() n1cursor.execute('SELECT tokens FROM system.local') tokens = n1cursor.fetchone() debug("Relocate tokens from node1 to node3") tl = " ".join(str(t) for t in list(tokens[0])[:8]) cmd = "taketoken %s" % tl debug(cmd) node3.nodetool(cmd) n1cursor.execute('SELECT tokens FROM system.local') tokens = n1cursor.fetchone() debug("%s" % tokens) assert len(tokens) == 2 debug("Checking that no data was lost") for n in xrange(10, 20): query_c1c2(n2cursor, n, 'ALL') for n in xrange(30, 1000): query_c1c2(n2cursor, n, 'ALL')
def simple_rebuild_test(self): """ @jira_ticket CASSANDRA-9119 Test rebuild from other dc works as expected. """ keys = 1000 cluster = self.cluster cluster.set_configuration_options(values={'endpoint_snitch': 'org.apache.cassandra.locator.PropertyFileSnitch'}) node1 = cluster.create_node('node1', False, ('127.0.0.1', 9160), ('127.0.0.1', 7000), '7100', '2000', None, binary_interface=('127.0.0.1', 9042)) cluster.add(node1, True, data_center='dc1') # start node in dc1 node1.start(wait_for_binary_proto=True) # populate data in dc1 session = self.patient_exclusive_cql_connection(node1) self.create_ks(session, 'ks', {'dc1': 1}) self.create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=keys, consistency=ConsistencyLevel.ALL) # check data for i in xrange(0, keys): query_c1c2(session, i, ConsistencyLevel.ALL) session.shutdown() # Bootstraping a new node in dc2 with auto_bootstrap: false node2 = cluster.create_node('node2', False, ('127.0.0.2', 9160), ('127.0.0.2', 7000), '7200', '2001', None, binary_interface=('127.0.0.2', 9042)) cluster.add(node2, False, data_center='dc2') node2.start(wait_other_notice=True, wait_for_binary_proto=True) # wait for snitch to reload time.sleep(60) # alter keyspace to replicate to dc2 session = self.patient_exclusive_cql_connection(node2) session.execute("ALTER KEYSPACE ks WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1};") session.execute('USE ks') self.rebuild_errors = 0 # rebuild dc2 from dc1 def rebuild(): try: node2.nodetool('rebuild dc1') except NodetoolError as e: if 'Node is still rebuilding' in e.message: self.rebuild_errors += 1 cmd1 = Thread(target=rebuild) cmd1.start() # concurrent rebuild should not be allowed (CASSANDRA-9119) # (following sleep is needed to avoid conflict in 'nodetool()' method setting up env.) time.sleep(.1) try: node2.nodetool('rebuild dc1') except NodetoolError: self.rebuild_errors += 1 cmd1.join() # exactly 1 of the two nodetool calls should fail # usually it will be the one in the main thread, # but occasionally it wins the race with the one in the secondary thread, # so we check that one succeeded and the other failed self.assertEqual(self.rebuild_errors, 1, msg='concurrent rebuild should not be allowed, but one rebuild command should have succeeded.') # check data for i in xrange(0, keys): query_c1c2(session, i, ConsistencyLevel.ALL)
def simple_rebuild_test(self): """ @jira_ticket CASSANDRA-9119 Test rebuild from other dc works as expected. """ keys = 1000 cluster = self.cluster cluster.set_configuration_options(values={ 'endpoint_snitch': 'org.apache.cassandra.locator.PropertyFileSnitch' }) node1 = cluster.create_node('node1', False, ('127.0.0.1', 9160), ('127.0.0.1', 7000), '7100', '2000', None, binary_interface=('127.0.0.1', 9042)) cluster.add(node1, True, data_center='dc1') # start node in dc1 node1.start(wait_for_binary_proto=True) # populate data in dc1 session = self.patient_exclusive_cql_connection(node1) self.create_ks(session, 'ks', {'dc1': 1}) self.create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=keys, consistency=ConsistencyLevel.ALL) # check data for i in xrange(0, keys): query_c1c2(session, i, ConsistencyLevel.ALL) session.shutdown() # Bootstraping a new node in dc2 with auto_bootstrap: false node2 = cluster.create_node('node2', False, ('127.0.0.2', 9160), ('127.0.0.2', 7000), '7200', '2001', None, binary_interface=('127.0.0.2', 9042)) cluster.add(node2, False, data_center='dc2') node2.start(wait_other_notice=True, wait_for_binary_proto=True) # wait for snitch to reload time.sleep(60) # alter keyspace to replicate to dc2 session = self.patient_exclusive_cql_connection(node2) session.execute( "ALTER KEYSPACE ks WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1};" ) session.execute('USE ks') self.rebuild_errors = 0 # rebuild dc2 from dc1 def rebuild(): try: node2.nodetool('rebuild dc1') except NodetoolError as e: if 'Node is still rebuilding' in e.message: self.rebuild_errors += 1 cmd1 = Thread(target=rebuild) cmd1.start() # concurrent rebuild should not be allowed (CASSANDRA-9119) # (following sleep is needed to avoid conflict in 'nodetool()' method setting up env.) time.sleep(.1) try: node2.nodetool('rebuild dc1') except NodetoolError: self.rebuild_errors += 1 cmd1.join() # exactly 1 of the two nodetool calls should fail # usually it will be the one in the main thread, # but occasionally it wins the race with the one in the secondary thread, # so we check that one succeeded and the other failed self.assertEqual( self.rebuild_errors, 1, msg= 'concurrent rebuild should not be allowed, but one rebuild command should have succeeded.' ) # check data for i in xrange(0, keys): query_c1c2(session, i, ConsistencyLevel.ALL)