def delete_node_and_devices_on_it(self, node_id): heketi_ops.heketi_node_disable(self.h_node, self.h_url, node_id) heketi_ops.heketi_node_remove(self.h_node, self.h_url, node_id) node_info = heketi_ops.heketi_node_info( self.h_node, self.h_url, node_id, json=True) for device in node_info['devices']: heketi_ops.heketi_device_delete( self.h_node, self.h_url, device['id']) heketi_ops.heketi_node_delete(self.h_node, self.h_url, node_id)
def test_heketi_metrics_validation_after_node(self, condition): """Validate heketi metrics after adding and remove node""" # Get additional node additional_host_info = g.config.get("additional_gluster_servers") if not additional_host_info: self.skipTest( "Skipping this test case as additional gluster server is " "not provied in config file") additional_host_info = list(additional_host_info.values())[0] storage_hostname = additional_host_info.get("manage") storage_ip = additional_host_info.get("storage") if not (storage_hostname and storage_ip): self.skipTest( "Config options 'additional_gluster_servers.manage' " "and 'additional_gluster_servers.storage' must be set.") h_client, h_server = self.heketi_client_node, self.heketi_server_url initial_node_count, final_node_count = 0, 0 # Get initial node count from prometheus metrics metric_result = self._fetch_metric_from_promtheus_pod( metric='heketi_nodes_count') initial_node_count = reduce( lambda x, y: x + y, [result.get('value')[1] for result in metric_result]) # Switch to storage project openshift_ops.switch_oc_project( self._master, self.storage_project_name) # Configure node before adding node self.configure_node_to_run_gluster(storage_hostname) # Get cluster list cluster_info = heketi_ops.heketi_cluster_list( h_client, h_server, json=True) # Add node to the cluster heketi_node_info = heketi_ops.heketi_node_add( h_client, h_server, len(self.gluster_servers), cluster_info.get('clusters')[0], storage_hostname, storage_ip, json=True) heketi_node_id = heketi_node_info.get("id") self.addCleanup( heketi_ops.heketi_node_delete, h_client, h_server, heketi_node_id, raise_on_error=False) self.addCleanup( heketi_ops.heketi_node_remove, h_client, h_server, heketi_node_id, raise_on_error=False) self.addCleanup( heketi_ops.heketi_node_disable, h_client, h_server, heketi_node_id, raise_on_error=False) self.addCleanup( openshift_ops.switch_oc_project, self._master, self.storage_project_name) if condition == 'delete': # Switch to openshift-monitoring project openshift_ops.switch_oc_project( self.ocp_master_node[0], self._prometheus_project_name) # Get initial node count from prometheus metrics for w in waiter.Waiter(timeout=60, interval=10): metric_result = self._fetch_metric_from_promtheus_pod( metric='heketi_nodes_count') node_count = reduce( lambda x, y: x + y, [result.get('value')[1] for result in metric_result]) if node_count != initial_node_count: break if w.expired: raise exceptions.ExecutionError( "Failed to get updated node details from prometheus") # Remove node from cluster heketi_ops.heketi_node_disable(h_client, h_server, heketi_node_id) heketi_ops.heketi_node_remove(h_client, h_server, heketi_node_id) for device in heketi_node_info.get('devices'): heketi_ops.heketi_device_delete( h_client, h_server, device.get('id')) heketi_ops.heketi_node_delete(h_client, h_server, heketi_node_id) # Switch to openshift-monitoring project openshift_ops.switch_oc_project( self.ocp_master_node[0], self._prometheus_project_name) # Get final node count from prometheus metrics for w in waiter.Waiter(timeout=60, interval=10): metric_result = self._fetch_metric_from_promtheus_pod( metric='heketi_nodes_count') final_node_count = reduce( lambda x, y: x + y, [result.get('value')[1] for result in metric_result]) if condition == 'delete': if final_node_count < node_count: break else: if final_node_count > initial_node_count: break if w.expired: raise exceptions.ExecutionError( "Failed to update node details in prometheus")
def test_dev_path_mapping_heketi_node_delete(self): """Validate dev path mapping for heketi node deletion lifecycle""" h_client, h_url = self.heketi_client_node, self.heketi_server_url node_ids = heketi_ops.heketi_node_list(h_client, h_url) self.assertTrue(node_ids, "Failed to get heketi node list") # Fetch #4th node for the operations h_disable_node = node_ids[3] # Fetch bricks on the devices before volume create h_node_details_before, h_node = self._get_bricks_and_device_details() # Bricks count on the node before pvc creation brick_count_before = [count[1] for count in h_node_details_before] # Create file volume with app pod and verify IO's # and compare path, UUID, vg_name pod_name, dc_name, use_percent = self._create_app_pod_and_verify_pvs() # Check if IO's are running use_percent_after = self._get_space_use_percent_in_app_pod(pod_name) self.assertNotEqual( use_percent, use_percent_after, "Failed to execute IO's in the app pod {} after respin".format( pod_name)) # Fetch bricks on the devices after volume create h_node_details_after, h_node = self._get_bricks_and_device_details() # Bricks count on the node after pvc creation brick_count_after = [count[1] for count in h_node_details_after] self.assertGreater( sum(brick_count_after), sum(brick_count_before), "Failed to add bricks on the node {}".format(h_node)) self.addCleanup(heketi_ops.heketi_node_disable, h_client, h_url, h_disable_node) # Enable the #4th node heketi_ops.heketi_node_enable(h_client, h_url, h_disable_node) node_info = heketi_ops.heketi_node_info(h_client, h_url, h_disable_node, json=True) h_node_id = node_info['id'] self.assertEqual(node_info['state'], "online", "Failed to enable node {}".format(h_disable_node)) # Disable the node and check for brick migrations self.addCleanup(heketi_ops.heketi_node_enable, h_client, h_url, h_node, raise_on_error=False) heketi_ops.heketi_node_disable(h_client, h_url, h_node) node_info = heketi_ops.heketi_node_info(h_client, h_url, h_node, json=True) self.assertEqual(node_info['state'], "offline", "Failed to disable node {}".format(h_node)) # Before bricks migration h_node_info = heketi_ops.heketi_node_info(h_client, h_url, h_node, json=True) # Bricks before migration on the node i.e to be deleted bricks_counts_before = 0 for device in h_node_info['devices']: bricks_counts_before += (len(device['bricks'])) # Remove the node heketi_ops.heketi_node_remove(h_client, h_url, h_node) # After bricks migration h_node_info_after = heketi_ops.heketi_node_info(h_client, h_url, h_node, json=True) # Bricks after migration on the node i.e to be delete bricks_counts = 0 for device in h_node_info_after['devices']: bricks_counts += (len(device['bricks'])) self.assertFalse( bricks_counts, "Failed to remove all the bricks from node {}".format(h_node)) # Old node which is to deleted, new node were bricks resides old_node, new_node = h_node, h_node_id # Node info for the new node were brick reside after migration h_node_info_new = heketi_ops.heketi_node_info(h_client, h_url, new_node, json=True) bricks_counts_after = 0 for device in h_node_info_new['devices']: bricks_counts_after += (len(device['bricks'])) self.assertEqual( bricks_counts_before, bricks_counts_after, "Failed to migrated bricks from {} node to {}".format( old_node, new_node)) # Fetch device list i.e to be deleted h_node_info = heketi_ops.heketi_node_info(h_client, h_url, h_node, json=True) devices_list = [[device['id'], device['name']] for device in h_node_info['devices']] for device in devices_list: device_id = device[0] device_name = device[1] self.addCleanup(heketi_ops.heketi_device_add, h_client, h_url, device_name, h_node, raise_on_error=False) # Device deletion from heketi node device_delete = heketi_ops.heketi_device_delete( h_client, h_url, device_id) self.assertTrue(device_delete, "Failed to delete the device {}".format(device_id)) node_info = heketi_ops.heketi_node_info(h_client, h_url, h_node, json=True) cluster_id = node_info['cluster'] zone = node_info['zone'] storage_hostname = node_info['hostnames']['manage'][0] storage_ip = node_info['hostnames']['storage'][0] # Delete the node self.addCleanup(heketi_ops.heketi_node_add, h_client, h_url, zone, cluster_id, storage_hostname, storage_ip, raise_on_error=False) heketi_ops.heketi_node_delete(h_client, h_url, h_node) # Verify if the node is deleted node_ids = heketi_ops.heketi_node_list(h_client, h_url) self.assertNotIn(old_node, node_ids, "Failed to delete the node {}".format(old_node)) # Check if IO's are running use_percent_after = self._get_space_use_percent_in_app_pod(pod_name) self.assertNotEqual( use_percent, use_percent_after, "Failed to execute IO's in the app pod {} after respin".format( pod_name)) # Adding node back h_node_info = heketi_ops.heketi_node_add(h_client, h_url, zone, cluster_id, storage_hostname, storage_ip, json=True) self.assertTrue( h_node_info, "Failed to add the node in the cluster {}".format(cluster_id)) h_node_id = h_node_info["id"] # Adding devices to the new node for device in devices_list: storage_device = device[1] # Add device to the new heketi node heketi_ops.heketi_device_add(h_client, h_url, storage_device, h_node_id) heketi_node_info = heketi_ops.heketi_node_info(h_client, h_url, h_node_id, json=True) device_id = None for device in heketi_node_info["devices"]: if device["name"] == storage_device: device_id = device["id"] break self.assertTrue( device_id, "Failed to add device {} on node {}".format( storage_device, h_node_id)) # Create n pvc in order to verfiy if the bricks reside on the new node pvc_amount, pvc_size = 5, 1 # Fetch bricks on the devices before volume create h_node_details_before, h_node = self._get_bricks_and_device_details() # Bricks count on the node before pvc creation brick_count_before = [count[1] for count in h_node_details_before] # Create file volumes pvc_name = self.create_and_wait_for_pvcs(pvc_size=pvc_size, pvc_amount=pvc_amount) self.assertEqual(len(pvc_name), pvc_amount, "Failed to create {} pvc".format(pvc_amount)) # Fetch bricks on the devices before volume create h_node_details_after, h_node = self._get_bricks_and_device_details() # Bricks count on the node after pvc creation brick_count_after = [count[1] for count in h_node_details_after] self.assertGreater( sum(brick_count_after), sum(brick_count_before), "Failed to add bricks on the new node {}".format(new_node)) # Check if IO's are running after new node is added use_percent_after = self._get_space_use_percent_in_app_pod(pod_name) self.assertNotEqual( use_percent, use_percent_after, "Failed to execute IO's in the app pod {} after respin".format( pod_name))