def add_heketi_node_to_cluster(self, cluster_id): """Add new node to a cluster""" storage_host_info = g.config.get("additional_gluster_servers") if not storage_host_info: self.skipTest( "Skip test case as 'additional_gluster_servers' option is " "not provided in config file") storage_host_info = list(storage_host_info.values())[0] try: storage_hostname = storage_host_info["manage"] storage_ip = storage_host_info["storage"] except KeyError: msg = ("Config options 'additional_gluster_servers.manage' " "and 'additional_gluster_servers.storage' must be set.") g.log.error(msg) raise exceptions.ConfigError(msg) h_client, h_server = self.heketi_client_node, self.heketi_server_url storage_zone = 1 self.configure_node_to_run_gluster(storage_hostname) heketi_node_info = heketi_ops.heketi_node_add(h_client, h_server, storage_zone, cluster_id, storage_hostname, storage_ip, json=True) heketi_node_id = heketi_node_info["id"] self.addCleanup(heketi_ops.heketi_node_delete, h_client, h_server, heketi_node_id) self.addCleanup(heketi_ops.heketi_node_remove, h_client, h_server, heketi_node_id) self.addCleanup(heketi_ops.heketi_node_disable, h_client, h_server, heketi_node_id) self.assertEqual( heketi_node_info["cluster"], cluster_id, "Node got added in unexpected cluster exp: %s, act: %s" % (cluster_id, heketi_node_info["cluster"])) return storage_hostname, storage_ip
def test_heketi_metrics_validation_after_node(self, condition): """Validate heketi metrics after adding and remove node""" # Get additional node additional_host_info = g.config.get("additional_gluster_servers") if not additional_host_info: self.skipTest( "Skipping this test case as additional gluster server is " "not provied in config file") additional_host_info = list(additional_host_info.values())[0] storage_hostname = additional_host_info.get("manage") storage_ip = additional_host_info.get("storage") if not (storage_hostname and storage_ip): self.skipTest( "Config options 'additional_gluster_servers.manage' " "and 'additional_gluster_servers.storage' must be set.") h_client, h_server = self.heketi_client_node, self.heketi_server_url initial_node_count, final_node_count = 0, 0 # Get initial node count from prometheus metrics metric_result = self._fetch_metric_from_promtheus_pod( metric='heketi_nodes_count') initial_node_count = reduce( lambda x, y: x + y, [result.get('value')[1] for result in metric_result]) # Switch to storage project openshift_ops.switch_oc_project( self._master, self.storage_project_name) # Configure node before adding node self.configure_node_to_run_gluster(storage_hostname) # Get cluster list cluster_info = heketi_ops.heketi_cluster_list( h_client, h_server, json=True) # Add node to the cluster heketi_node_info = heketi_ops.heketi_node_add( h_client, h_server, len(self.gluster_servers), cluster_info.get('clusters')[0], storage_hostname, storage_ip, json=True) heketi_node_id = heketi_node_info.get("id") self.addCleanup( heketi_ops.heketi_node_delete, h_client, h_server, heketi_node_id, raise_on_error=False) self.addCleanup( heketi_ops.heketi_node_remove, h_client, h_server, heketi_node_id, raise_on_error=False) self.addCleanup( heketi_ops.heketi_node_disable, h_client, h_server, heketi_node_id, raise_on_error=False) self.addCleanup( openshift_ops.switch_oc_project, self._master, self.storage_project_name) if condition == 'delete': # Switch to openshift-monitoring project openshift_ops.switch_oc_project( self.ocp_master_node[0], self._prometheus_project_name) # Get initial node count from prometheus metrics for w in waiter.Waiter(timeout=60, interval=10): metric_result = self._fetch_metric_from_promtheus_pod( metric='heketi_nodes_count') node_count = reduce( lambda x, y: x + y, [result.get('value')[1] for result in metric_result]) if node_count != initial_node_count: break if w.expired: raise exceptions.ExecutionError( "Failed to get updated node details from prometheus") # Remove node from cluster heketi_ops.heketi_node_disable(h_client, h_server, heketi_node_id) heketi_ops.heketi_node_remove(h_client, h_server, heketi_node_id) for device in heketi_node_info.get('devices'): heketi_ops.heketi_device_delete( h_client, h_server, device.get('id')) heketi_ops.heketi_node_delete(h_client, h_server, heketi_node_id) # Switch to openshift-monitoring project openshift_ops.switch_oc_project( self.ocp_master_node[0], self._prometheus_project_name) # Get final node count from prometheus metrics for w in waiter.Waiter(timeout=60, interval=10): metric_result = self._fetch_metric_from_promtheus_pod( metric='heketi_nodes_count') final_node_count = reduce( lambda x, y: x + y, [result.get('value')[1] for result in metric_result]) if condition == 'delete': if final_node_count < node_count: break else: if final_node_count > initial_node_count: break if w.expired: raise exceptions.ExecutionError( "Failed to update node details in prometheus")
def test_create_heketi_cluster_and_add_node(self): """Test heketi node add to a newly created cluster""" storage_host_info = g.config.get("additional_gluster_servers") if not storage_host_info: self.skipTest( "Skip test case as 'additional_gluster_servers' option is " "not provided in config file") storage_host_info = list(storage_host_info.values())[0] try: storage_hostname = storage_host_info["manage"] storage_ip = storage_host_info["storage"] storage_device = storage_host_info["devices"][0] except KeyError: msg = ("Config options 'additional_gluster_servers.manage' " "'additional_gluster_servers.storage' and " "'additional_gluster_servers.devices' " "must be set.") g.log.error(msg) raise exceptions.ConfigError(msg) h_client, h_server = self.heketi_client_node, self.heketi_server_url storage_zone = 1 cluster_id = heketi_ops.heketi_cluster_create(self.heketi_client_node, self.heketi_server_url, json=True)["id"] self.addCleanup(heketi_ops.heketi_cluster_delete, self.heketi_client_node, self.heketi_server_url, cluster_id) self.configure_node_to_run_gluster(storage_hostname) heketi_node_info = heketi_ops.heketi_node_add(h_client, h_server, storage_zone, cluster_id, storage_hostname, storage_ip, json=True) heketi_node_id = heketi_node_info["id"] self.addCleanup(heketi_ops.heketi_node_delete, h_client, h_server, heketi_node_id) self.addCleanup(heketi_ops.heketi_node_remove, h_client, h_server, heketi_node_id) self.addCleanup(heketi_ops.heketi_node_disable, h_client, h_server, heketi_node_id) self.assertEqual( heketi_node_info["cluster"], cluster_id, "Node got added in unexpected cluster exp: %s, act: %s" % (cluster_id, heketi_node_info["cluster"])) heketi_ops.heketi_device_add(h_client, h_server, storage_device, heketi_node_id) heketi_node_info = heketi_ops.heketi_node_info(h_client, h_server, heketi_node_id, json=True) device_id = None for device in heketi_node_info["devices"]: if device["name"] == storage_device: device_id = device["id"] break err_msg = ("Failed to add device %s on node %s" % (storage_device, heketi_node_id)) self.assertTrue(device_id, err_msg) self.addCleanup(heketi_ops.heketi_device_delete, h_client, h_server, device_id) self.addCleanup(heketi_ops.heketi_device_remove, h_client, h_server, device_id) self.addCleanup(heketi_ops.heketi_device_disable, h_client, h_server, device_id) cluster_info = heketi_ops.heketi_cluster_info(h_client, h_server, cluster_id, json=True) self.assertIn( heketi_node_info["id"], cluster_info["nodes"], "Newly added node %s not found in cluster %s, cluster info %s" % (heketi_node_info["id"], cluster_id, cluster_info)) topology_info = heketi_ops.heketi_topology_info(h_client, h_server, json=True) cluster_details = [ cluster for cluster in topology_info["clusters"] if cluster["id"] == cluster_id ] err_msg = "Cluster details for id '%s' not found" % cluster_id self.assertTrue(cluster_details, err_msg) err_msg = ("Multiple clusters with same id '%s' found %s" % (cluster_id, cluster_details)) self.assertEqual(len(cluster_details), 1, err_msg) node_details = [ node for node in cluster_details[0]["nodes"] if node["id"] == heketi_node_id ] err_msg = "Node details for id '%s' not found" % heketi_node_id self.assertTrue(node_details, err_msg) err_msg = ("Multiple nodes with same id '%s' found %s" % (heketi_node_id, node_details)) self.assertEqual(len(node_details), 1, err_msg) err_msg = "Unexpected %s found '%s', expected '%s'" exp_storage_hostname = node_details[0]["hostnames"]["manage"][0] self.assertEqual( exp_storage_hostname, storage_hostname, err_msg % ( "hostname", exp_storage_hostname, storage_hostname, )) exp_storage_ip = node_details[0]["hostnames"]["storage"][0] self.assertEqual(exp_storage_ip, storage_ip, err_msg % ("IP address", exp_storage_ip, storage_ip)) zone = node_details[0]["zone"] self.assertEqual(zone, storage_zone, err_msg % ("zone", zone, storage_zone))
def test_dev_path_mapping_heketi_node_delete(self): """Validate dev path mapping for heketi node deletion lifecycle""" h_client, h_url = self.heketi_client_node, self.heketi_server_url node_ids = heketi_ops.heketi_node_list(h_client, h_url) self.assertTrue(node_ids, "Failed to get heketi node list") # Fetch #4th node for the operations h_disable_node = node_ids[3] # Fetch bricks on the devices before volume create h_node_details_before, h_node = self._get_bricks_and_device_details() # Bricks count on the node before pvc creation brick_count_before = [count[1] for count in h_node_details_before] # Create file volume with app pod and verify IO's # and compare path, UUID, vg_name pod_name, dc_name, use_percent = self._create_app_pod_and_verify_pvs() # Check if IO's are running use_percent_after = self._get_space_use_percent_in_app_pod(pod_name) self.assertNotEqual( use_percent, use_percent_after, "Failed to execute IO's in the app pod {} after respin".format( pod_name)) # Fetch bricks on the devices after volume create h_node_details_after, h_node = self._get_bricks_and_device_details() # Bricks count on the node after pvc creation brick_count_after = [count[1] for count in h_node_details_after] self.assertGreater( sum(brick_count_after), sum(brick_count_before), "Failed to add bricks on the node {}".format(h_node)) self.addCleanup(heketi_ops.heketi_node_disable, h_client, h_url, h_disable_node) # Enable the #4th node heketi_ops.heketi_node_enable(h_client, h_url, h_disable_node) node_info = heketi_ops.heketi_node_info(h_client, h_url, h_disable_node, json=True) h_node_id = node_info['id'] self.assertEqual(node_info['state'], "online", "Failed to enable node {}".format(h_disable_node)) # Disable the node and check for brick migrations self.addCleanup(heketi_ops.heketi_node_enable, h_client, h_url, h_node, raise_on_error=False) heketi_ops.heketi_node_disable(h_client, h_url, h_node) node_info = heketi_ops.heketi_node_info(h_client, h_url, h_node, json=True) self.assertEqual(node_info['state'], "offline", "Failed to disable node {}".format(h_node)) # Before bricks migration h_node_info = heketi_ops.heketi_node_info(h_client, h_url, h_node, json=True) # Bricks before migration on the node i.e to be deleted bricks_counts_before = 0 for device in h_node_info['devices']: bricks_counts_before += (len(device['bricks'])) # Remove the node heketi_ops.heketi_node_remove(h_client, h_url, h_node) # After bricks migration h_node_info_after = heketi_ops.heketi_node_info(h_client, h_url, h_node, json=True) # Bricks after migration on the node i.e to be delete bricks_counts = 0 for device in h_node_info_after['devices']: bricks_counts += (len(device['bricks'])) self.assertFalse( bricks_counts, "Failed to remove all the bricks from node {}".format(h_node)) # Old node which is to deleted, new node were bricks resides old_node, new_node = h_node, h_node_id # Node info for the new node were brick reside after migration h_node_info_new = heketi_ops.heketi_node_info(h_client, h_url, new_node, json=True) bricks_counts_after = 0 for device in h_node_info_new['devices']: bricks_counts_after += (len(device['bricks'])) self.assertEqual( bricks_counts_before, bricks_counts_after, "Failed to migrated bricks from {} node to {}".format( old_node, new_node)) # Fetch device list i.e to be deleted h_node_info = heketi_ops.heketi_node_info(h_client, h_url, h_node, json=True) devices_list = [[device['id'], device['name']] for device in h_node_info['devices']] for device in devices_list: device_id = device[0] device_name = device[1] self.addCleanup(heketi_ops.heketi_device_add, h_client, h_url, device_name, h_node, raise_on_error=False) # Device deletion from heketi node device_delete = heketi_ops.heketi_device_delete( h_client, h_url, device_id) self.assertTrue(device_delete, "Failed to delete the device {}".format(device_id)) node_info = heketi_ops.heketi_node_info(h_client, h_url, h_node, json=True) cluster_id = node_info['cluster'] zone = node_info['zone'] storage_hostname = node_info['hostnames']['manage'][0] storage_ip = node_info['hostnames']['storage'][0] # Delete the node self.addCleanup(heketi_ops.heketi_node_add, h_client, h_url, zone, cluster_id, storage_hostname, storage_ip, raise_on_error=False) heketi_ops.heketi_node_delete(h_client, h_url, h_node) # Verify if the node is deleted node_ids = heketi_ops.heketi_node_list(h_client, h_url) self.assertNotIn(old_node, node_ids, "Failed to delete the node {}".format(old_node)) # Check if IO's are running use_percent_after = self._get_space_use_percent_in_app_pod(pod_name) self.assertNotEqual( use_percent, use_percent_after, "Failed to execute IO's in the app pod {} after respin".format( pod_name)) # Adding node back h_node_info = heketi_ops.heketi_node_add(h_client, h_url, zone, cluster_id, storage_hostname, storage_ip, json=True) self.assertTrue( h_node_info, "Failed to add the node in the cluster {}".format(cluster_id)) h_node_id = h_node_info["id"] # Adding devices to the new node for device in devices_list: storage_device = device[1] # Add device to the new heketi node heketi_ops.heketi_device_add(h_client, h_url, storage_device, h_node_id) heketi_node_info = heketi_ops.heketi_node_info(h_client, h_url, h_node_id, json=True) device_id = None for device in heketi_node_info["devices"]: if device["name"] == storage_device: device_id = device["id"] break self.assertTrue( device_id, "Failed to add device {} on node {}".format( storage_device, h_node_id)) # Create n pvc in order to verfiy if the bricks reside on the new node pvc_amount, pvc_size = 5, 1 # Fetch bricks on the devices before volume create h_node_details_before, h_node = self._get_bricks_and_device_details() # Bricks count on the node before pvc creation brick_count_before = [count[1] for count in h_node_details_before] # Create file volumes pvc_name = self.create_and_wait_for_pvcs(pvc_size=pvc_size, pvc_amount=pvc_amount) self.assertEqual(len(pvc_name), pvc_amount, "Failed to create {} pvc".format(pvc_amount)) # Fetch bricks on the devices before volume create h_node_details_after, h_node = self._get_bricks_and_device_details() # Bricks count on the node after pvc creation brick_count_after = [count[1] for count in h_node_details_after] self.assertGreater( sum(brick_count_after), sum(brick_count_before), "Failed to add bricks on the new node {}".format(new_node)) # Check if IO's are running after new node is added use_percent_after = self._get_space_use_percent_in_app_pod(pod_name) self.assertNotEqual( use_percent, use_percent_after, "Failed to execute IO's in the app pod {} after respin".format( pod_name))