def test_heketi_and_prometheus_device_count(self): """Check if device count is same in heketi and promtheus""" cluster_ids_metrics, cluster_ids_promtheus = [], [] hostnames_metrics, hostnames_promtheus = [], [] total_value_metrics, total_value_promtheus = 0, 0 metrics = heketi_ops.get_heketi_metrics( self.heketi_client_node, self.heketi_server_url) heketi_device_count_metric = metrics.get('heketi_device_count') for result in heketi_device_count_metric: cluster_ids_metrics.append(result.get('cluster')) hostnames_metrics.append(result.get('hostname')) total_value_metrics += int(result.get('value')) metric_result = self._fetch_metric_from_promtheus_pod( metric='heketi_device_count') for result in metric_result: total_value_promtheus += int(result.get('value')[1]) cluster_ids_promtheus.append(result.get('metric')['cluster']) hostnames_promtheus.append(result.get('metric')['hostname']) self.assertEqual(cluster_ids_metrics, cluster_ids_promtheus, "Cluster ID's are not same") self.assertEqual(hostnames_metrics, hostnames_promtheus, "Hostnames are not same") self.assertEqual(total_value_metrics, total_value_promtheus, "Total device counts are not same")
def test_heketi_metrics_heketipod_failure(self): """Validate heketi metrics after heketi pod failure""" scale_dc_pod_amount_and_wait(self.ocp_master_node[0], self.heketi_dc_name, pod_amount=0) self.addCleanup(scale_dc_pod_amount_and_wait, self.ocp_master_node[0], self.heketi_dc_name, pod_amount=1) # verify that metrics is not accessable when heketi pod is down with self.assertRaises(AssertionError): get_heketi_metrics(self.heketi_client_node, self.heketi_server_url, prometheus_format=True) scale_dc_pod_amount_and_wait(self.ocp_master_node[0], self.heketi_dc_name, pod_amount=1) pod_name = get_pod_name_from_dc(self.ocp_master_node[0], self.heketi_dc_name, self.heketi_dc_name) wait_for_pod_be_ready(self.ocp_master_node[0], pod_name, wait_step=5) for i in range(3): vol = heketi_volume_create(self.heketi_client_node, self.heketi_server_url, 1, json=True) self.assertTrue(vol) self.addCleanup(heketi_volume_delete, self.heketi_client_node, self.heketi_server_url, vol['id'], raise_on_error=False) vol_list = heketi_volume_list(self.heketi_client_node, self.heketi_server_url) self.assertIn(vol['id'], vol_list) self.verify_heketi_metrics_with_topology_info()
def test_heketi_metrics_heketipod_failure(self): """Validate heketi metrics after heketi pod failure""" scale_dc_pod_amount_and_wait( self.ocp_master_node[0], self.heketi_dc_name, pod_amount=0) self.addCleanup( scale_dc_pod_amount_and_wait, self.ocp_master_node[0], self.heketi_dc_name, pod_amount=1) # verify that metrics is not accessable when heketi pod is down with self.assertRaises(exceptions.ExecutionError): get_heketi_metrics( self.heketi_client_node, self.heketi_server_url, prometheus_format=True) scale_dc_pod_amount_and_wait( self.ocp_master_node[0], self.heketi_dc_name, pod_amount=1) pod_name = get_pod_name_from_dc( self.ocp_master_node[0], self.heketi_dc_name, self.heketi_dc_name) wait_for_pod_be_ready(self.ocp_master_node[0], pod_name, wait_step=5) for i in range(3): vol = heketi_volume_create( self.heketi_client_node, self.heketi_server_url, 1, json=True) self.assertTrue(vol) self.addCleanup( heketi_volume_delete, self.heketi_client_node, self.heketi_server_url, vol['id'], raise_on_error=False) vol_list = heketi_volume_list( self.heketi_client_node, self.heketi_server_url) self.assertIn(vol['id'], vol_list) self.verify_heketi_metrics_with_topology_info()
def verify_volume_count(self): metrics = get_heketi_metrics(self.heketi_client_node, self.heketi_server_url) self.assertTrue(metrics['heketi_volumes_count']) for vol_count in metrics['heketi_volumes_count']: self.assertTrue(vol_count['cluster']) cluster_info = heketi_cluster_info(self.heketi_client_node, self.heketi_server_url, vol_count['cluster'], json=True) self.assertEqual(vol_count['value'], len(cluster_info['volumes']))
def verify_volume_count(self): metrics = get_heketi_metrics( self.heketi_client_node, self.heketi_server_url) self.assertTrue(metrics['heketi_volumes_count']) for vol_count in metrics['heketi_volumes_count']: self.assertTrue(vol_count['cluster']) cluster_info = heketi_cluster_info( self.heketi_client_node, self.heketi_server_url, vol_count['cluster'], json=True) self.assertEqual(vol_count['value'], len(cluster_info['volumes']))
def test_heketi_metrics_validating_cluster_count(self): """Validate 'cluster count' in heketi metrics""" cluster_list = heketi_cluster_list( self.heketi_client_node, self.heketi_server_url, json=True) self.assertTrue(cluster_list) self.assertTrue(cluster_list.get('clusters')) metrics = get_heketi_metrics( self.heketi_client_node, self.heketi_server_url) self.assertTrue(metrics) self.assertTrue(metrics.get('heketi_cluster_count')) self.assertEqual( len(cluster_list['clusters']), metrics['heketi_cluster_count'])
def test_heketi_metrics_validating_cluster_count(self): """Validate 'cluster count' in heketi metrics""" cluster_list = heketi_cluster_list(self.heketi_client_node, self.heketi_server_url, json=True) self.assertTrue(cluster_list) self.assertTrue(cluster_list.get('clusters')) metrics = get_heketi_metrics(self.heketi_client_node, self.heketi_server_url) self.assertTrue(metrics) self.assertTrue(metrics.get('heketi_cluster_count')) self.assertEqual(len(cluster_list['clusters']), metrics['heketi_cluster_count'])
def test_heketi_metrics_validating_existing_node_count(self): """Validate existing 'node count' in heketi metrics""" metrics = get_heketi_metrics( self.heketi_client_node, self.heketi_server_url) self.assertTrue(metrics) self.assertTrue(metrics.get('heketi_nodes_count')) for cluster in metrics['heketi_nodes_count']: cluster_info = heketi_cluster_info( self.heketi_client_node, self.heketi_server_url, cluster['cluster'], json=True) self.assertTrue(cluster_info) self.assertTrue(cluster_info.get('nodes')) self.assertEqual(len(cluster_info['nodes']), cluster['value'])
def test_heketi_metrics_validating_existing_node_count(self): """Validate existing 'node count' in heketi metrics""" metrics = get_heketi_metrics(self.heketi_client_node, self.heketi_server_url) self.assertTrue(metrics) self.assertTrue(metrics.get('heketi_nodes_count')) for cluster in metrics['heketi_nodes_count']: cluster_info = heketi_cluster_info(self.heketi_client_node, self.heketi_server_url, cluster['cluster'], json=True) self.assertTrue(cluster_info) self.assertTrue(cluster_info.get('nodes')) self.assertEqual(len(cluster_info['nodes']), cluster['value'])
def verify_heketi_metrics_with_topology_info(self): topology = heketi_topology_info(self.heketi_client_node, self.heketi_server_url, json=True) metrics = get_heketi_metrics(self.heketi_client_node, self.heketi_server_url) self.assertTrue(topology) self.assertIn('clusters', list(topology.keys())) self.assertGreater(len(topology['clusters']), 0) self.assertTrue(metrics) self.assertGreater(len(metrics.keys()), 0) self.assertEqual(len(topology['clusters']), metrics['heketi_cluster_count']) for cluster in topology['clusters']: self.assertIn('nodes', list(cluster.keys())) self.assertGreater(len(cluster['nodes']), 0) cluster_id = cluster['id'] cluster_ids = ([ obj['cluster'] for obj in metrics['heketi_nodes_count'] ]) self.assertIn(cluster_id, cluster_ids) for node_count in metrics['heketi_nodes_count']: if node_count['cluster'] == cluster_id: self.assertEqual(len(cluster['nodes']), node_count['value']) cluster_ids = ([ obj['cluster'] for obj in metrics['heketi_volumes_count'] ]) self.assertIn(cluster_id, cluster_ids) for vol_count in metrics['heketi_volumes_count']: if vol_count['cluster'] == cluster_id: self.assertEqual(len(cluster['volumes']), vol_count['value']) for node in cluster['nodes']: self.assertIn('devices', list(node.keys())) self.assertGreater(len(node['devices']), 0) hostname = node['hostnames']['manage'][0] cluster_ids = ([ obj['cluster'] for obj in metrics['heketi_device_count'] ]) self.assertIn(cluster_id, cluster_ids) hostnames = ([ obj['hostname'] for obj in metrics['heketi_device_count'] ]) self.assertIn(hostname, hostnames) for device_count in metrics['heketi_device_count']: if (device_count['cluster'] == cluster_id and device_count['hostname'] == hostname): self.assertEqual(len(node['devices']), device_count['value']) for device in node['devices']: device_name = device['name'] device_size_t = device['storage']['total'] device_free_t = device['storage']['free'] device_used_t = device['storage']['used'] cluster_ids = ([ obj['cluster'] for obj in metrics['heketi_device_brick_count'] ]) self.assertIn(cluster_id, cluster_ids) hostnames = ([ obj['hostname'] for obj in metrics['heketi_device_brick_count'] ]) self.assertIn(hostname, hostnames) devices = ([ obj['device'] for obj in metrics['heketi_device_brick_count'] ]) self.assertIn(device_name, devices) for brick_count in metrics['heketi_device_brick_count']: if (brick_count['cluster'] == cluster_id and brick_count['hostname'] == hostname and brick_count['device'] == device_name): self.assertEqual(len(device['bricks']), brick_count['value']) cluster_ids = ([ obj['cluster'] for obj in metrics['heketi_device_size'] ]) self.assertIn(cluster_id, cluster_ids) hostnames = ([ obj['hostname'] for obj in metrics['heketi_device_size'] ]) self.assertIn(hostname, hostnames) devices = ([ obj['device'] for obj in metrics['heketi_device_size'] ]) self.assertIn(device_name, devices) for device_size in metrics['heketi_device_size']: if (device_size['cluster'] == cluster_id and device_size['hostname'] == hostname and device_size['device'] == device_name): self.assertEqual(device_size_t, device_size['value']) cluster_ids = ([ obj['cluster'] for obj in metrics['heketi_device_free'] ]) self.assertIn(cluster_id, cluster_ids) hostnames = ([ obj['hostname'] for obj in metrics['heketi_device_free'] ]) self.assertIn(hostname, hostnames) devices = ([ obj['device'] for obj in metrics['heketi_device_free'] ]) self.assertIn(device_name, devices) for device_free in metrics['heketi_device_free']: if (device_free['cluster'] == cluster_id and device_free['hostname'] == hostname and device_free['device'] == device_name): self.assertEqual(device_free_t, device_free['value']) cluster_ids = ([ obj['cluster'] for obj in metrics['heketi_device_used'] ]) self.assertIn(cluster_id, cluster_ids) hostnames = ([ obj['hostname'] for obj in metrics['heketi_device_used'] ]) self.assertIn(hostname, hostnames) devices = ([ obj['device'] for obj in metrics['heketi_device_used'] ]) self.assertIn(device_name, devices) for device_used in metrics['heketi_device_used']: if (device_used['cluster'] == cluster_id and device_used['hostname'] == hostname and device_used['device'] == device_name): self.assertEqual(device_used_t, device_used['value'])
def test_heketi_metrics_validation_with_node_reboot(self): """Validate heketi metrics after node reboot using prometheus""" initial_metrics, final_metrics = {}, {} # Use storage project openshift_ops.switch_oc_project( self._master, self.storage_project_name) # Get initial metrics result h_node, h_server = self.heketi_client_node, self.heketi_server_url initial_metrics = tuple( heketi_ops.get_heketi_metrics(h_node, h_server).get(metric)[0] for metric in self.metrics) # Use prometheus project openshift_ops.switch_oc_project( self._master, self._prometheus_project_name) # Get initial prometheus result initial_prometheus = self._get_and_manipulate_metric_data( self.metrics) # Get hosted node IP of heketi pod openshift_ops.switch_oc_project( self._master, self.storage_project_name) heketi_pod = openshift_ops.get_pod_name_from_dc( self._master, self.heketi_dc_name) heketi_node = openshift_ops.oc_get_custom_resource( self._master, 'pod', '.:spec.nodeName', heketi_pod)[0] # Reboot the node on which heketi pod is scheduled self.addCleanup( self._check_heketi_and_gluster_pod_after_node_reboot, heketi_node) node_ops.node_reboot_by_command(heketi_node) # Wait node to become NotReady custom = r'":.status.conditions[?(@.type==\"Ready\")]".status' for w in waiter.Waiter(300, 10): status = openshift_ops.oc_get_custom_resource( self._master, 'node', custom, heketi_node) if status[0] == 'False': break if w.expired: raise exceptions.ExecutionError( "Failed to bring down node {}".format(heketi_node)) # Wait for node to become ready openshift_ops.wait_for_ocp_node_be_ready(self._master, heketi_node) # Wait for heketi and glusterfs pod to become ready self._check_heketi_and_gluster_pod_after_node_reboot(heketi_node) # Use prometheus project openshift_ops.switch_oc_project( self._master, self._prometheus_project_name) # Get final metrics result final_metrics = tuple( heketi_ops.get_heketi_metrics(h_node, h_server).get(metric)[0] for metric in self.metrics) # Get final prometheus result final_prometheus = self._get_and_manipulate_metric_data( self.metrics) err_msg = "Initial value {} is not same as final value {}" self.assertEqual( initial_metrics, final_metrics, err_msg.format( initial_metrics, final_metrics)) self.assertEqual( initial_prometheus, final_prometheus, err_msg.format( initial_prometheus, final_prometheus))
def test_heketi_prometheus_usedbytes_brickcount_on_device_delete( self, operation): """Validate used bytes,device count on heketi and prometheus""" h_node, h_server = self.heketi_client_node, self.heketi_server_url # Get list of additional devices for one of the Gluster nodes gluster_server_0 = list(self.gluster_servers_info.values())[0] manage_hostname = gluster_server_0.get("manage") self.assertTrue( manage_hostname, "IP Address is not specified for " "node {}".format(gluster_server_0)) device_name = gluster_server_0.get("additional_devices")[0] self.assertTrue( device_name, "Additional devices are not specified for " "node {}".format(gluster_server_0)) # Get node ID of the Gluster hostname node_list = heketi_ops.heketi_topology_info( h_node, h_server, json=True).get("clusters")[0].get("nodes") self.assertTrue( node_list, "Cluster info command returned empty list of nodes") node_id = [ node.get("id") for node in node_list if manage_hostname == node.get("hostnames").get("manage")[0]] self.assertTrue( node_id, "Failed to get node_id for {}".format(manage_hostname)) node_id = node_id[0] # Adding heketi device heketi_ops.heketi_device_add(h_node, h_server, device_name, node_id) node_info_after_addition = heketi_ops.heketi_node_info( h_node, h_server, node_id, json=True) device_id, bricks = None, None for device in node_info_after_addition.get("devices"): if device.get("name") == device_name: device_id, bricks = ( device.get("id"), len(device.get("bricks"))) break # Verify zero bricks on the device msg = ( "Number of bricks on the device {} of the nodes should be" "zero".format(device_name)) self.assertFalse(bricks, msg) self.addCleanup( heketi_ops.heketi_device_delete, h_node, h_server, device_id, raise_on_error=False) self.addCleanup( heketi_ops.heketi_device_remove, h_node, h_server, device_id, raise_on_error=False) self.addCleanup( heketi_ops.heketi_device_disable, h_node, h_server, device_id, raise_on_error=False) # Disable,Remove and Delete heketi device heketi_ops.heketi_device_disable(h_node, h_server, device_id) heketi_ops.heketi_device_remove(h_node, h_server, device_id) heketi_ops.heketi_device_delete(h_node, h_server, device_id) # Verify device deletion node_info_after_deletion = ( heketi_ops.heketi_node_info(h_node, h_server, node_id)) msg = ("Device {} should not be shown in node info of the node {}" "after the device deletion".format(device_id, node_id)) self.assertNotIn(device_id, node_info_after_deletion, msg) if operation == "usedbytes": # Validate heketi and prometheus device used bytes for w in waiter.Waiter(timeout=60, interval=10): device_used_bytes_prometheus = 0 device_used_bytes_metrics = 0 openshift_ops.switch_oc_project( self.ocp_master_node[0], 'openshift-monitoring') metric_result = self._fetch_metric_from_promtheus_pod( metric='heketi_device_used_bytes') for result in metric_result: if (node_id == result.get('cluster') and device_name == result.get('device')): device_used_bytes_prometheus += ( int(result.get('value')[1])) openshift_ops.switch_oc_project( self.ocp_master_node[0], 'glusterfs') metrics = heketi_ops.get_heketi_metrics(h_node, h_server) heketi_device_count_metric = ( metrics.get('heketi_device_used_bytes')) for result in heketi_device_count_metric: if (node_id == result.get('cluster') and device_name == result.get('device')): device_used_bytes_metrics = int(result.get('value')) if device_used_bytes_prometheus == device_used_bytes_metrics: break if w.expired: raise exceptions.ExecutionError( "Failed to update device details in prometheus") elif operation == "brickcount": # Validate heketi and prometheus device brick count for w in waiter.Waiter(timeout=60, interval=10): device_brick_count_prometheus = 0 device_brick_count_metrics = 0 metrics = heketi_ops.get_heketi_metrics(h_node, h_server) heketi_device_count_metric = metrics.get( 'heketi_device_brick_count') for result in heketi_device_count_metric: device_brick_count_metrics += int(result.get('value')) openshift_ops.switch_oc_project( self.ocp_master_node[0], 'openshift-monitoring') metric_result = self._fetch_metric_from_promtheus_pod( metric='heketi_device_brick_count') for result in metric_result: device_brick_count_prometheus += ( int(result.get('value')[1])) if device_brick_count_prometheus == device_brick_count_metrics: break if w.expired: raise exceptions.ExecutionError( "Failed to update device details in prometheus")
def verify_heketi_metrics_with_topology_info(self): topology = heketi_topology_info( self.heketi_client_node, self.heketi_server_url, json=True) metrics = get_heketi_metrics( self.heketi_client_node, self.heketi_server_url) self.assertTrue(topology) self.assertIn('clusters', list(topology.keys())) self.assertGreater(len(topology['clusters']), 0) self.assertTrue(metrics) self.assertGreater(len(metrics.keys()), 0) self.assertEqual( len(topology['clusters']), metrics['heketi_cluster_count']) for cluster in topology['clusters']: self.assertIn('nodes', list(cluster.keys())) self.assertGreater(len(cluster['nodes']), 0) cluster_id = cluster['id'] cluster_ids = ([obj['cluster'] for obj in metrics['heketi_nodes_count']]) self.assertIn(cluster_id, cluster_ids) for node_count in metrics['heketi_nodes_count']: if node_count['cluster'] == cluster_id: self.assertEqual( len(cluster['nodes']), node_count['value']) cluster_ids = ([obj['cluster'] for obj in metrics['heketi_volumes_count']]) self.assertIn(cluster_id, cluster_ids) for vol_count in metrics['heketi_volumes_count']: if vol_count['cluster'] == cluster_id: self.assertEqual( len(cluster['volumes']), vol_count['value']) for node in cluster['nodes']: self.assertIn('devices', list(node.keys())) self.assertGreater(len(node['devices']), 0) hostname = node['hostnames']['manage'][0] cluster_ids = ([obj['cluster'] for obj in metrics['heketi_device_count']]) self.assertIn(cluster_id, cluster_ids) hostnames = ([obj['hostname'] for obj in metrics['heketi_device_count']]) self.assertIn(hostname, hostnames) for device_count in metrics['heketi_device_count']: if (device_count['cluster'] == cluster_id and device_count['hostname'] == hostname): self.assertEqual( len(node['devices']), device_count['value']) for device in node['devices']: device_name = device['name'] device_size_t = device['storage']['total'] device_free_t = device['storage']['free'] device_used_t = device['storage']['used'] cluster_ids = ([obj['cluster'] for obj in metrics['heketi_device_brick_count']]) self.assertIn(cluster_id, cluster_ids) hostnames = ([obj['hostname'] for obj in metrics['heketi_device_brick_count']]) self.assertIn(hostname, hostnames) devices = ([obj['device'] for obj in metrics['heketi_device_brick_count']]) self.assertIn(device_name, devices) for brick_count in metrics['heketi_device_brick_count']: if (brick_count['cluster'] == cluster_id and brick_count['hostname'] == hostname and brick_count['device'] == device_name): self.assertEqual( len(device['bricks']), brick_count['value']) cluster_ids = ([obj['cluster'] for obj in metrics['heketi_device_size']]) self.assertIn(cluster_id, cluster_ids) hostnames = ([obj['hostname'] for obj in metrics['heketi_device_size']]) self.assertIn(hostname, hostnames) devices = ([obj['device'] for obj in metrics['heketi_device_size']]) self.assertIn(device_name, devices) for device_size in metrics['heketi_device_size']: if (device_size['cluster'] == cluster_id and device_size['hostname'] == hostname and device_size['device'] == device_name): self.assertEqual( device_size_t, device_size['value']) cluster_ids = ([obj['cluster'] for obj in metrics['heketi_device_free']]) self.assertIn(cluster_id, cluster_ids) hostnames = ([obj['hostname'] for obj in metrics['heketi_device_free']]) self.assertIn(hostname, hostnames) devices = ([obj['device'] for obj in metrics['heketi_device_free']]) self.assertIn(device_name, devices) for device_free in metrics['heketi_device_free']: if (device_free['cluster'] == cluster_id and device_free['hostname'] == hostname and device_free['device'] == device_name): self.assertEqual( device_free_t, device_free['value']) cluster_ids = ([obj['cluster'] for obj in metrics['heketi_device_used']]) self.assertIn(cluster_id, cluster_ids) hostnames = ([obj['hostname'] for obj in metrics['heketi_device_used']]) self.assertIn(hostname, hostnames) devices = ([obj['device'] for obj in metrics['heketi_device_used']]) self.assertIn(device_name, devices) for device_used in metrics['heketi_device_used']: if (device_used['cluster'] == cluster_id and device_used['hostname'] == hostname and device_used['device'] == device_name): self.assertEqual( device_used_t, device_used['value'])