def test_driver_recovers_nework_isolation(self): start_and_prime_singledc() idle_heartbeat_timeout = 3 idle_heartbeat_interval = 1 listener = TrackDownListener() cluster = Cluster(['127.0.0.1'], load_balancing_policy=RoundRobinPolicy(), idle_heartbeat_timeout=idle_heartbeat_timeout, idle_heartbeat_interval=idle_heartbeat_interval, executor_threads=16) session = cluster.connect(wait_for_all_pools=True) cluster.register_listener(listener) prime_request(PrimeOptions(then=NO_THEN)) prime_request(RejectConnections(RejectType.REJECT_STARTUP)) time.sleep((idle_heartbeat_timeout + idle_heartbeat_interval) * 2) for host in cluster.metadata.all_hosts(): self.assertIn(host, listener.hosts_marked_down) self.assertRaises(NoHostAvailable, session.execute, "SELECT * from system.local") clear_queries() prime_request(AcceptConnections()) time.sleep(idle_heartbeat_timeout + idle_heartbeat_interval + 2) self.assertIsNotNone(session.execute("SELECT * from system.local"))
def test_idle_connection_is_not_closed(self): """ Test to ensure that the connections aren't closed if they are idle @since 3.12 @jira_ticket PYTHON-573 @expected_result the connections aren't closed nor the hosts are set to down if the connection is idle @test_category connection """ start_and_prime_singledc() idle_heartbeat_timeout = 1 idle_heartbeat_interval = 1 listener = TrackDownListener() cluster = Cluster(compression=False, idle_heartbeat_interval=idle_heartbeat_interval, idle_heartbeat_timeout=idle_heartbeat_timeout) session = cluster.connect(wait_for_all_pools=True) cluster.register_listener(listener) self.addCleanup(cluster.shutdown) time.sleep(20) self.assertEqual(listener.hosts_marked_down, [])
def test_host_is_not_set_to_down_after_query_oto(self): """ Test to ensure that the connections aren't closed if there's an OperationTimedOut in a normal query. This should only happen from the heart beat thread (in the case of a OperationTimedOut) with the default configuration @since 3.12 @expected_result the connections aren't closed nor the hosts are set to down @test_category connection """ start_and_prime_singledc() query_to_prime = "SELECT * FROM madeup_keyspace.madeup_table" prime_query(query_to_prime, then=NO_THEN) listener = TrackDownListener() cluster = Cluster(compression=False) session = cluster.connect(wait_for_all_pools=True) cluster.register_listener(listener) futures = [] for _ in range(10): future = session.execute_async(query_to_prime) futures.append(future) for f in futures: f._event.wait() self.assertIsInstance(f._final_exception, OperationTimedOut) self.assertEqual(listener.hosts_marked_down, []) assert_quiescent_pool_state(self, cluster)
def test_removed_node_stops_reconnecting(self): """ Ensure we stop reconnecting after a node is removed. PYTHON-1181 """ use_cluster("test_down_then_removed", [3], start=True) state_listener = StateListener() cluster = Cluster(protocol_version=PROTOCOL_VERSION) self.addCleanup(cluster.shutdown) cluster.register_listener(state_listener) session = cluster.connect(wait_for_all_pools=True) get_node(3).nodetool("disablebinary") wait_until(condition=lambda: state_listener.downed_host is not None, delay=2, max_attempts=50) self.assertTrue(state_listener.downed_host.is_currently_reconnecting()) decommission(3) wait_until(condition=lambda: state_listener.removed_host is not None, delay=2, max_attempts=50) self.assertIs(state_listener.downed_host, state_listener.removed_host) # Just a sanity check self.assertFalse(state_listener.removed_host.is_currently_reconnecting())
def test_heart_beat_timeout(self): """ Test to ensure the hosts are marked as down after a OTO is received. Also to ensure this happens within the expected timeout @since 3.10 @jira_ticket PYTHON-762 @expected_result all the hosts have been marked as down at some point @test_category metadata """ number_of_dcs = 3 nodes_per_dc = 20 query_to_prime = "INSERT INTO test3rf.test (k, v) VALUES (0, 1);" idle_heartbeat_timeout = 5 idle_heartbeat_interval = 1 start_and_prime_cluster_defaults(number_of_dcs, nodes_per_dc) listener = TrackDownListener() executor = ThreadTracker(max_workers=8) # We need to disable compression since it's not supported in simulacron cluster = Cluster( compression=False, idle_heartbeat_interval=idle_heartbeat_interval, idle_heartbeat_timeout=idle_heartbeat_timeout, executor_threads=8, execution_profiles={ EXEC_PROFILE_DEFAULT: ExecutionProfile(load_balancing_policy=RoundRobinPolicy()) }) self.addCleanup(cluster.shutdown) cluster.scheduler.shutdown() cluster.executor = executor cluster.scheduler = _Scheduler(executor) session = cluster.connect(wait_for_all_pools=True) cluster.register_listener(listener) log = logging.getLogger() log.setLevel('CRITICAL') self.addCleanup(log.setLevel, "DEBUG") prime_query(query_to_prime, then=NO_THEN) futures = [] for _ in range(number_of_dcs * nodes_per_dc): future = session.execute_async(query_to_prime) futures.append(future) for f in futures: f._event.wait() self.assertIsInstance(f._final_exception, OperationTimedOut) prime_request(PrimeOptions(then=NO_THEN)) # We allow from some extra time for all the hosts to be to on_down # The callbacks should start happening after idle_heartbeat_timeout + idle_heartbeat_interval time.sleep((idle_heartbeat_timeout + idle_heartbeat_interval) * 2.5) for host in cluster.metadata.all_hosts(): self.assertIn(host, listener.hosts_marked_down) # In this case HostConnection._replace shouldn't be called self.assertNotIn("_replace", executor.called_functions)
class HeartbeatTest(unittest.TestCase): """ Test to validate failing a heartbeat check doesn't mark a host as down @since 3.3 @jira_ticket PYTHON-286 @expected_result host should not be marked down when heartbeat fails @test_category connection heartbeat """ def setUp(self): self.cluster = Cluster(protocol_version=PROTOCOL_VERSION, idle_heartbeat_interval=1) self.session = self.cluster.connect(wait_for_all_pools=True) def tearDown(self): self.cluster.shutdown() def test_heart_beat_timeout(self): # Setup a host listener to ensure the nodes don't go down test_listener = TestHostListener() host = "127.0.0.1" node = get_node(1) initial_connections = self.fetch_connections(host, self.cluster) self.assertNotEqual(len(initial_connections), 0) self.cluster.register_listener(test_listener) # Pause the node node.pause() # Wait for connections associated with this host go away self.wait_for_no_connections(host, self.cluster) # Resume paused node node.resume() # Run a query to ensure connections are re-established current_host = "" count = 0 while current_host != host and count < 100: rs = self.session.execute_async("SELECT * FROM system.local", trace=False) rs.result() current_host = str(rs._current_host) count += 1 time.sleep(.1) self.assertLess(count, 100, "Never connected to the first node") new_connections = self.wait_for_connections(host, self.cluster) self.assertIsNone(test_listener.host_down) # Make sure underlying new connections don't match previous ones for connection in initial_connections: self.assertFalse(connection in new_connections) def fetch_connections(self, host, cluster): # Given a cluster object and host grab all connection associated with that host connections = [] holders = cluster.get_connection_holders() for conn in holders: if host == str(getattr(conn, 'host', '')): if isinstance(conn, HostConnectionPool): if conn._connections is not None: connections.append(conn._connections) else: if conn._connection is not None: connections.append(conn._connection) return connections def wait_for_connections(self, host, cluster): retry = 0 while(retry < 300): retry += 1 connections = self.fetch_connections(host, cluster) if len(connections) is not 0: return connections time.sleep(.1) self.fail("No new connections found") def wait_for_no_connections(self, host, cluster): retry = 0 while(retry < 100): retry += 1 connections = self.fetch_connections(host, cluster) if len(connections) is 0: return time.sleep(.5) self.fail("Connections never cleared")
class HeartbeatTest(unittest.TestCase): """ Test to validate failing a heartbeat check doesn't mark a host as down @since 3.3 @jira_ticket PYTHON-286 @expected_result host should not be marked down when heartbeat fails @test_category connection heartbeat """ def setUp(self): self.cluster = Cluster(protocol_version=PROTOCOL_VERSION, idle_heartbeat_interval=1) self.session = self.cluster.connect(wait_for_all_pools=True) def tearDown(self): self.cluster.shutdown() def test_heart_beat_timeout(self): # Setup a host listener to ensure the nodes don't go down test_listener = TestHostListener() host = "127.0.0.1" node = get_node(1) initial_connections = self.fetch_connections(host, self.cluster) self.assertNotEqual(len(initial_connections), 0) self.cluster.register_listener(test_listener) # Pause the node node.pause() # Wait for connections associated with this host go away self.wait_for_no_connections(host, self.cluster) # Resume paused node node.resume() # Run a query to ensure connections are re-established current_host = "" count = 0 while current_host != host and count < 100: rs = self.session.execute_async("SELECT * FROM system.local", trace=False) rs.result() current_host = str(rs._current_host) count += 1 time.sleep(.1) self.assertLess(count, 100, "Never connected to the first node") new_connections = self.wait_for_connections(host, self.cluster) self.assertIsNone(test_listener.host_down) # Make sure underlying new connections don't match previous ones for connection in initial_connections: self.assertFalse(connection in new_connections) def fetch_connections(self, host, cluster): # Given a cluster object and host grab all connection associated with that host connections = [] holders = cluster.get_connection_holders() for conn in holders: if host == str(getattr(conn, 'host', '')): if isinstance(conn, HostConnectionPool): if conn._connections is not None: connections.append(conn._connections) else: if conn._connection is not None: connections.append(conn._connection) return connections def wait_for_connections(self, host, cluster): retry = 0 while (retry < 300): retry += 1 connections = self.fetch_connections(host, cluster) if len(connections) is not 0: return connections time.sleep(.1) self.fail("No new connections found") def wait_for_no_connections(self, host, cluster): retry = 0 while (retry < 100): retry += 1 connections = self.fetch_connections(host, cluster) if len(connections) is 0: return time.sleep(.5) self.fail("Connections never cleared")
def test_heart_beat_timeout(self): """ Test to ensure the hosts are marked as down after a OTO is received. Also to ensure this happens within the expected timeout @since 3.10 @jira_ticket PYTHON-762 @expected_result all the hosts have been marked as down at some point @test_category metadata """ number_of_dcs = 3 nodes_per_dc = 20 query_to_prime = "INSERT INTO test3rf.test (k, v) VALUES (0, 1);" idle_heartbeat_timeout = 5 idle_heartbeat_interval = 1 start_and_prime_cluster_defaults(number_of_dcs, nodes_per_dc) listener = TrackDownListener() executor = ThreadTracker(max_workers=8) # We need to disable compression since it's not supported in simulacron cluster = Cluster(compression=False, idle_heartbeat_interval=idle_heartbeat_interval, idle_heartbeat_timeout=idle_heartbeat_timeout, executor_threads=8, execution_profiles={ EXEC_PROFILE_DEFAULT: ExecutionProfile(load_balancing_policy=RoundRobinPolicy())}) self.addCleanup(cluster.shutdown) cluster.scheduler.shutdown() cluster.executor = executor cluster.scheduler = _Scheduler(executor) session = cluster.connect(wait_for_all_pools=True) cluster.register_listener(listener) log = logging.getLogger() log.setLevel('CRITICAL') self.addCleanup(log.setLevel, "DEBUG") prime_query(query_to_prime, then=NO_THEN) futures = [] for _ in range(number_of_dcs * nodes_per_dc): future = session.execute_async(query_to_prime) futures.append(future) for f in futures: f._event.wait() self.assertIsInstance(f._final_exception, OperationTimedOut) prime_request(PrimeOptions(then=NO_THEN)) # We allow from some extra time for all the hosts to be to on_down # The callbacks should start happening after idle_heartbeat_timeout + idle_heartbeat_interval time.sleep((idle_heartbeat_timeout + idle_heartbeat_interval) * 2.5) for host in cluster.metadata.all_hosts(): self.assertIn(host, listener.hosts_marked_down) # In this case HostConnection._replace shouldn't be called self.assertNotIn("_replace", executor.called_functions)