def test_glusterblock_logs_presence_verification(self):
        """Validate presence of glusterblock provisioner POD and it's status"""

        # Get glusterblock provisioner dc name
        cmd = ("oc get dc | awk '{ print $1 }' | "
               "grep -e glusterblock -e provisioner")
        dc_name = cmd_run(cmd, self.ocp_master_node[0], True)

        # Get glusterblock provisioner pod name and it's status
        gb_prov_name, gb_prov_status = oc_get_custom_resource(
            self.node, 'pod', custom=':.metadata.name,:.status.phase',
            selector='deploymentconfig=%s' % dc_name)[0]
        self.assertEqual(gb_prov_status, 'Running')

        # Create Secret, SC and PVC
        self.create_storage_class()
        self.create_and_wait_for_pvc()

        # Get list of Gluster nodes
        g_hosts = list(g.config.get("gluster_servers", {}).keys())
        self.assertGreater(
            len(g_hosts), 0,
            "We expect, at least, one Gluster Node/POD:\n %s" % g_hosts)

        # Perform checks on Gluster nodes/PODs
        logs = ("gluster-block-configshell", "gluster-blockd")

        gluster_pods = oc_get_pods(
            self.ocp_client[0], selector="glusterfs-node=pod")
        cmd = "tail -n 5 /var/log/glusterfs/gluster-block/%s.log"
        for g_host in g_hosts:
            for log in logs:
                out = cmd_run_on_gluster_pod_or_node(
                    self.ocp_client[0], cmd % log, gluster_node=g_host)
                self.assertTrue(out, "Command '%s' output is empty." % cmd)
    def verify_pods_are_running(self):
        pods = oc_get_pods(self.ocp_master_node[0], selector='scale=scale')

        faulty_pods = {}
        for pod in pods.keys():
            if not (pods[pod]['ready'] == '1/1'
                    and pods[pod]['status'] == 'Running'):
                faulty_pods[pod] = pods[pod]

        msg = "Out of {} pods {} pods are not running. Pods are {}".format(
            len(pods), len(faulty_pods), faulty_pods)
        self.assertFalse(faulty_pods, msg)
Пример #3
0
    def test_prometheus_volume_metrics_on_pod_restart(self):
        """Validate volume metrics using prometheus before and after pod
        restart"""

        # Create PVC and wait for it to be in 'Bound' state
        pvc_name = self.create_and_wait_for_pvc()
        pod_name = openshift_ops.oc_create_tiny_pod_with_volume(
            self._master, pvc_name, "autotest-volume",
            image=self.io_container_image_cirros)
        self.addCleanup(openshift_ops.oc_delete, self._master, 'pod', pod_name,
                        raise_on_absence=False)

        # Wait for POD be up and running
        openshift_ops.wait_for_pod_be_ready(
            self._master, pod_name, timeout=60, wait_step=2)

        # Write data on the volume and wait for 2 mins and sleep is must for
        # prometheus to get the exact values of the metrics
        self._run_io_on_the_pod(pod_name, 30)
        time.sleep(120)

        # Fetching the metrics and storing in initial_metrics as dictionary
        initial_metrics = self._get_and_manipulate_metric_data(
            self.metrics, pvc_name)

        # Mark the current node unschedulable on which app pod is running
        openshift_ops.switch_oc_project(
            self._master, self.storage_project_name)
        pod_info = openshift_ops.oc_get_pods(self._master, name=pod_name)
        openshift_ops.oc_adm_manage_node(
            self._master, '--schedulable=false',
            nodes=[pod_info[pod_name]["node"]])
        self.addCleanup(
            openshift_ops.oc_adm_manage_node, self._master,
            '--schedulable=true', nodes=[pod_info[pod_name]["node"]])

        # Delete the existing pod and create a new pod
        openshift_ops.oc_delete(self._master, 'pod', pod_name)
        pod_name = openshift_ops.oc_create_tiny_pod_with_volume(
            self._master, pvc_name, "autotest-volume")
        self.addCleanup(openshift_ops.oc_delete, self._master, 'pod', pod_name)

        # Wait for POD be up and running and prometheus to refresh the data
        openshift_ops.wait_for_pod_be_ready(
            self._master, pod_name, timeout=60, wait_step=2)
        time.sleep(120)

        # Fetching the metrics and storing in final_metrics as dictionary and
        # validating with initial_metrics
        final_metrics = self._get_and_manipulate_metric_data(
            self.metrics, pvc_name)
        self.assertEqual(dict(initial_metrics), dict(final_metrics),
                         "Metrics are different post pod restart")
    def verify_if_more_than_n_percentage_pod_restarted(
            self, pods_old, selector='scale=scale', percentage=33):
        # Make sure pods did not got restarted
        pods_new = oc_get_pods(self.ocp_master_node[0], selector=selector)
        pods_restart = []
        for pod in pods_new.keys():
            if pods_new[pod]['restarts'] != pods_old[pod]['restarts']:
                pods_restart += [pod]

        if len(pods_restart) > int(len(pods_new.keys()) / (100 / percentage)):
            msg = "Out of {} pods {} pods restarted {}".format(
                len(pods_new), len(pods_restart), pods_restart)
            raise AssertionError(msg)
 def _fetch_metric_from_promtheus_pod(self, metric):
     """Fetch metric from prometheus pod using api call"""
     prometheus_pods = list(openshift_ops.oc_get_pods(
         self._master, selector=self._prometheus_resources_selector).keys())
     fetch_metric_cmd = ("curl 'http://localhost:9090/api/v1/query"
                         "?query={}'".format(metric))
     ret, metric_data, _ = openshift_ops.oc_rsh(
         self._master, prometheus_pods[0], fetch_metric_cmd)
     metric_result = json.loads(metric_data)["data"]["result"]
     if (not metric_result) or ret:
         raise exceptions.ExecutionError(
             "Failed to fecth data for metric {}, output {}".format(
                 metric, metric_result))
     return metric_result
    def validate_multipath_info(self, hacount):
        """validates multipath command on the pod node

        Args:
            hacount (int): hacount for which multipath to be checked
        """
        # create pod using pvc created
        dc_name = oc_create_app_dc_with_io(
            self.ocp_master_node[0],
            self.pvc_name,
            image=self.io_container_image_cirros)
        pod_name = get_pod_name_from_dc(self.ocp_master_node[0], dc_name)
        self.addCleanup(oc_delete, self.ocp_master_node[0], "dc", dc_name)
        self.addCleanup(scale_dc_pod_amount_and_wait, self.ocp_master_node[0],
                        dc_name, 0)

        wait_for_pod_be_ready(self.ocp_master_node[0],
                              pod_name,
                              timeout=120,
                              wait_step=3)

        # Get pod info
        pod_info = oc_get_pods(self.ocp_master_node[0],
                               selector='deploymentconfig=%s' % dc_name)
        node = pod_info[pod_name]['node']

        # Find iqn from volume info
        pv_name = get_pv_name_from_pvc(self.ocp_master_node[0], self.pvc_name)
        custom = [r':.metadata.annotations."gluster\.org\/volume\-id"']
        vol_id = oc_get_custom_resource(self.ocp_master_node[0], 'pv', custom,
                                        pv_name)[0]
        vol_info = heketi_blockvolume_info(self.heketi_client_node,
                                           self.heketi_server_url,
                                           vol_id,
                                           json=True)
        iqn = vol_info['blockvolume']['iqn']

        # Get the paths info from the node
        devices = get_iscsi_block_devices_by_path(node, iqn).keys()
        self.assertEqual(hacount, len(devices))

        # Validate mpath
        mpaths = set()
        for device in devices:
            mpaths.add(get_mpath_name_from_device_name(node, device))
        self.assertEqual(1, len(mpaths))
        validate_multipath_pod(self.ocp_master_node[0], pod_name, hacount,
                               list(mpaths)[0])
    def validate_multipath_info(self, hacount):
        """validates multipath command on the pod node

        Args:
            hacount (int): hacount for which multipath to be checked
        """
        # create pod using pvc created
        dc_name = oc_create_app_dc_with_io(
            self.ocp_master_node[0], self.pvc_name
        )
        pod_name = get_pod_name_from_dc(self.ocp_master_node[0], dc_name)
        self.addCleanup(oc_delete, self.ocp_master_node[0], "dc", dc_name)
        self.addCleanup(
            scale_dc_pod_amount_and_wait, self.ocp_master_node[0], dc_name, 0
        )

        wait_for_pod_be_ready(
            self.ocp_master_node[0], pod_name, timeout=120, wait_step=3
        )

        # Get pod info
        pod_info = oc_get_pods(
            self.ocp_master_node[0], selector='deploymentconfig=%s' % dc_name)
        node = pod_info[pod_name]['node']

        # Find iqn from volume info
        pv_name = get_pv_name_from_pvc(self.ocp_master_node[0], self.pvc_name)
        custom = [r':.metadata.annotations."gluster\.org\/volume\-id"']
        vol_id = oc_get_custom_resource(
            self.ocp_master_node[0], 'pv', custom, pv_name)[0]
        vol_info = heketi_blockvolume_info(
            self.heketi_client_node, self.heketi_server_url, vol_id, json=True)
        iqn = vol_info['blockvolume']['iqn']

        # Get the paths info from the node
        devices = get_iscsi_block_devices_by_path(node, iqn).keys()
        self.assertEqual(hacount, len(devices))

        # Validate mpath
        mpaths = set()
        for device in devices:
            mpaths.add(get_mpath_name_from_device_name(node, device))
        self.assertEqual(1, len(mpaths))
        validate_multipath_pod(
            self.ocp_master_node[0], pod_name, hacount, list(mpaths)[0])
    def test_glusterblock_logs_presence_verification(self):
        """Validate presence of glusterblock provisioner POD and it's status"""

        # Get glusterblock provisioner dc name
        cmd = ("oc get dc | awk '{ print $1 }' | "
               "grep -e glusterblock -e provisioner")
        dc_name = cmd_run(cmd, self.ocp_master_node[0], True)

        # Get glusterblock provisioner pod name and it's status
        gb_prov_name, gb_prov_status = oc_get_custom_resource(
            self.node, 'pod', custom=':.metadata.name,:.status.phase',
            selector='deploymentconfig=%s' % dc_name)[0]
        self.assertEqual(gb_prov_status, 'Running')

        # Create Secret, SC and PVC
        self.create_storage_class()
        self.create_and_wait_for_pvc()

        # Get list of Gluster nodes
        g_hosts = list(g.config.get("gluster_servers", {}).keys())
        self.assertGreater(
            len(g_hosts), 0,
            "We expect, at least, one Gluster Node/POD:\n %s" % g_hosts)

        # Perform checks on Gluster nodes/PODs
        logs = ("gluster-block-configshell", "gluster-blockd")

        gluster_pods = oc_get_pods(
            self.ocp_client[0], selector="glusterfs-node=pod")
        if gluster_pods:
            cmd = "tail -n 5 /var/log/glusterfs/gluster-block/%s.log"
        else:
            cmd = "tail -n 5 /var/log/gluster-block/%s.log"
        for g_host in g_hosts:
            for log in logs:
                out = cmd_run_on_gluster_pod_or_node(
                    self.ocp_client[0], cmd % log, gluster_node=g_host)
                self.assertTrue(out, "Command '%s' output is empty." % cmd)
    def test_glusterblock_logs_presence_verification(self):
        """Validate presence of glusterblock provisioner POD and it's status"""
        gb_prov_cmd = ("oc get pods --all-namespaces "
                       "-l glusterfs=block-%s-provisioner-pod "
                       "-o=custom-columns=:.metadata.name,:.status.phase" %
                       (self.storage_project_name))
        ret, out, err = g.run(self.ocp_client[0], gb_prov_cmd, "root")

        self.assertEqual(ret, 0, "Failed to get Glusterblock provisioner POD.")
        gb_prov_name, gb_prov_status = out.split()
        self.assertEqual(gb_prov_status, 'Running')

        # Create Secret, SC and PVC
        self.create_storage_class()
        self.create_and_wait_for_pvc()

        # Get list of Gluster nodes
        g_hosts = list(g.config.get("gluster_servers", {}).keys())
        self.assertGreater(
            len(g_hosts), 0,
            "We expect, at least, one Gluster Node/POD:\n %s" % g_hosts)

        # Perform checks on Gluster nodes/PODs
        logs = ("gluster-block-configshell", "gluster-blockd")

        gluster_pods = oc_get_pods(self.ocp_client[0],
                                   selector="glusterfs-node=pod")
        if gluster_pods:
            cmd = "tail -n 5 /var/log/glusterfs/gluster-block/%s.log"
        else:
            cmd = "tail -n 5 /var/log/gluster-block/%s.log"
        for g_host in g_hosts:
            for log in logs:
                out = cmd_run_on_gluster_pod_or_node(self.ocp_client[0],
                                                     cmd % log,
                                                     gluster_node=g_host)
                self.assertTrue(out, "Command '%s' output is empty." % cmd)
    def test_glusterblock_logs_presence_verification(self):
        """Validate presence of glusterblock provisioner POD and it's status"""
        gb_prov_cmd = ("oc get pods --all-namespaces "
                       "-l glusterfs=block-%s-provisioner-pod "
                       "-o=custom-columns=:.metadata.name,:.status.phase" % (
                           self.storage_project_name))
        ret, out, err = g.run(self.ocp_client[0], gb_prov_cmd, "root")

        self.assertEqual(ret, 0, "Failed to get Glusterblock provisioner POD.")
        gb_prov_name, gb_prov_status = out.split()
        self.assertEqual(gb_prov_status, 'Running')

        # Create Secret, SC and PVC
        self.create_storage_class()
        self.create_and_wait_for_pvc()

        # Get list of Gluster nodes
        g_hosts = list(g.config.get("gluster_servers", {}).keys())
        self.assertGreater(
            len(g_hosts), 0,
            "We expect, at least, one Gluster Node/POD:\n %s" % g_hosts)

        # Perform checks on Gluster nodes/PODs
        logs = ("gluster-block-configshell", "gluster-blockd")

        gluster_pods = oc_get_pods(
            self.ocp_client[0], selector="glusterfs-node=pod")
        if gluster_pods:
            cmd = "tail -n 5 /var/log/glusterfs/gluster-block/%s.log"
        else:
            cmd = "tail -n 5 /var/log/gluster-block/%s.log"
        for g_host in g_hosts:
            for log in logs:
                out = cmd_run_on_gluster_pod_or_node(
                    self.ocp_client[0], cmd % log, gluster_node=g_host)
                self.assertTrue(out, "Command '%s' output is empty." % cmd)
    def verify_iscsi_sessions_and_multipath(
            self, pvc_name, rname, rtype='dc', heketi_server_url=None,
            is_registry_gluster=False):
        if not heketi_server_url:
            heketi_server_url = self.heketi_server_url

        # Get storage ips of glusterfs pods
        keys = (list(g.config['gluster_registry_servers'].keys()) if
                is_registry_gluster else self.gluster_servers)
        servers_info = (g.config['gluster_registry_servers'] if
                        is_registry_gluster else self.gluster_servers_info)
        gluster_ips = []
        for key in keys:
            gluster_ips.append(servers_info[key]['storage'])
        gluster_ips.sort()

        # Find iqn and hacount from volume info
        pv_name = get_pv_name_from_pvc(self.ocp_client[0], pvc_name)
        custom = [r':.metadata.annotations."gluster\.org\/volume\-id"']
        vol_id = oc_get_custom_resource(
            self.ocp_client[0], 'pv', custom, pv_name)[0]
        vol_info = heketi_blockvolume_info(
            self.heketi_client_node, heketi_server_url, vol_id, json=True)
        iqn = vol_info['blockvolume']['iqn']
        hacount = int(vol_info['hacount'])

        # Find node on which pod is running
        if rtype == 'dc':
            pod_name = get_pod_name_from_dc(self.ocp_client[0], rname)
            pod_info = oc_get_pods(
                self.ocp_client[0], selector='deploymentconfig=%s' % rname)
        elif rtype == 'pod':
            pod_info = oc_get_pods(self.ocp_client[0], name=rname)
            pod_name = rname
        elif rtype == 'rc':
            pod_name = get_pod_name_from_rc(self.ocp_client[0], rname)
            pod_info = oc_get_pods(
                self.ocp_client[0], selector='name=%s' % rname)
        else:
            raise NameError("Value of rtype should be either 'dc' or 'pod'")

        node = pod_info[pod_name]['node']

        # Get the iscsi sessions info from the node
        iscsi = get_iscsi_session(node, iqn)
        msg = ('Only %s iscsi sessions are present on node %s, expected %s.'
               % (iscsi, node, hacount))
        self.assertEqual(hacount, len(iscsi), msg)
        iscsi.sort()
        msg = ("Only gluster Nodes %s were expected in iscsi sessions, "
               "but got other Nodes %s on Node %s" % (
                   gluster_ips, iscsi, node))
        self.assertEqual(set(iscsi), (set(gluster_ips) & set(iscsi)), msg)

        # Get the paths info from the node
        devices = get_iscsi_block_devices_by_path(node, iqn)
        msg = ("Only %s devices are present on Node %s, expected %s" % (
            devices, node, hacount,))
        self.assertEqual(hacount, len(devices), msg)

        # Get mpath names and verify that only one mpath is there
        mpaths = set()
        for device in devices.keys():
            mpaths.add(get_mpath_name_from_device_name(node, device))
        msg = ("Only one mpath was expected on Node %s, but got %s" % (
            node, mpaths))
        self.assertEqual(1, len(mpaths), msg)

        validate_multipath_pod(
            self.ocp_client[0], pod_name, hacount, mpath=list(mpaths)[0])

        return iqn, hacount, node
    def initiator_side_failures(self):

        # get storage ips of glusterfs pods
        keys = self.gluster_servers
        gluster_ips = []
        for key in keys:
            gluster_ips.append(self.gluster_servers_info[key]['storage'])
        gluster_ips.sort()

        self.create_storage_class()
        self.create_and_wait_for_pvc()

        # find iqn and hacount from volume info
        pv_name = get_pv_name_from_pvc(self.node, self.pvc_name)
        custom = [r':.metadata.annotations."gluster\.org\/volume\-id"']
        vol_id = oc_get_custom_resource(self.node, 'pv', custom, pv_name)[0]
        vol_info = heketi_blockvolume_info(self.heketi_client_node,
                                           self.heketi_server_url,
                                           vol_id,
                                           json=True)
        iqn = vol_info['blockvolume']['iqn']
        hacount = int(self.sc['hacount'])

        # create app pod
        dc_name, pod_name = self.create_dc_with_pvc(self.pvc_name)

        # When we have to verify iscsi login  devices & mpaths, we run it twice
        for i in range(2):

            # get node hostname from pod info
            pod_info = oc_get_pods(self.node,
                                   selector='deploymentconfig=%s' % dc_name)
            node = pod_info[pod_name]['node']

            # get the iscsi sessions info from the node
            iscsi = get_iscsi_session(node, iqn)
            self.assertEqual(hacount, len(iscsi))
            iscsi.sort()
            self.assertEqual(set(iscsi), (set(gluster_ips) & set(iscsi)))

            # get the paths info from the node
            devices = get_iscsi_block_devices_by_path(node, iqn).keys()
            self.assertEqual(hacount, len(devices))

            # get mpath names and verify that only one mpath is there
            mpaths = set()
            for device in devices:
                mpaths.add(get_mpath_name_from_device_name(node, device))
            self.assertEqual(1, len(mpaths))

            validate_multipath_pod(self.node,
                                   pod_name,
                                   hacount,
                                   mpath=list(mpaths)[0])

            # When we have to verify iscsi session logout, we run only once
            if i == 1:
                break

            # make node unschedulabe where pod is running
            oc_adm_manage_node(self.node, '--schedulable=false', nodes=[node])

            # make node schedulabe where pod is running
            self.addCleanup(oc_adm_manage_node,
                            self.node,
                            '--schedulable=true',
                            nodes=[node])

            # delete pod so it get respun on any other node
            oc_delete(self.node, 'pod', pod_name)
            wait_for_resource_absence(self.node, 'pod', pod_name)

            # wait for pod to come up
            pod_name = get_pod_name_from_dc(self.node, dc_name)
            wait_for_pod_be_ready(self.node, pod_name)

            # get the iscsi session from the previous node to verify logout
            iscsi = get_iscsi_session(node, iqn, raise_on_error=False)
            self.assertFalse(iscsi)
    def test_metrics_workload_on_prometheus(self):
        """Validate metrics workload on prometheus"""

        # Skip test if the prometheus pods are not present
        openshift_ops.switch_oc_project(
            self._master, self._prometheus_project_name)
        prometheus_pods = openshift_ops.oc_get_pods(
            self._master, selector=self._prometheus_resources_selector)
        if not prometheus_pods:
            self.skipTest(
                prometheus_pods, "Skipping test as prometheus"
                " pod is not present")

        if not self.registry_sc:
            self.skipTest(
                prometheus_pods, "Skipping test as registry "
                " storage details are not provided")
        self._registry_project = self.registry_sc.get(
            'restsecretnamespace')
        self.prefix = "autotest-{}".format(utils.get_random_str())

        # Get one of the prometheus pod name and respective pvc name
        prometheus_pod = list(prometheus_pods.keys())[0]
        pvc_custom = ":.spec.volumes[*].persistentVolumeClaim.claimName"
        pvc_name = openshift_ops.oc_get_custom_resource(
            self._master, "pod", pvc_custom, prometheus_pod)[0]
        self.assertTrue(
            pvc_name, "Failed to get PVC name for prometheus"
            " pod {}".format(prometheus_pod))
        self.verify_iscsi_sessions_and_multipath(
            pvc_name, prometheus_pod, rtype='pod',
            heketi_server_url=self._registry_heketi_server_url,
            is_registry_gluster=True)

        # Try to fetch metric from the prometheus pod
        self._fetch_metric_from_promtheus_pod(
            metric='kube_persistentvolumeclaim_info')

        # Create storage class
        openshift_ops.switch_oc_project(
            self._master, self._registry_project)
        self.sc_name = self.create_storage_class(
            vol_name_prefix=self.prefix, glusterfs_registry=True)
        self.addCleanup(openshift_ops.switch_oc_project,
                        self._master, self._registry_project)

        # Create PVCs and app pods
        pvc_size, pvc_count, batch_count = 1, 5, 5
        for _ in range(batch_count):
            test_pvc_names = self.create_and_wait_for_pvcs(
                pvc_size, pvc_name_prefix=self.prefix,
                pvc_amount=pvc_count, sc_name=self.sc_name, timeout=600,
                wait_step=10)
            self.create_dcs_with_pvc(
                test_pvc_names, timeout=600, wait_step=5,
                dc_name_prefix="autotests-dc-with-app-io",
                space_to_use=1048576)

        # Check from the prometheus pod for the PVC space usage
        openshift_ops.switch_oc_project(
            self._master, self._prometheus_project_name)
        mount_path = "/prometheus"
        cmd = "oc exec {0} -- df -PT {1} | grep {1}".format(
            prometheus_pod, mount_path)
        out = self.cmd_run(cmd)
        self.assertTrue(out, "Failed to get info about mounted volume. "
                             "Output is empty.")

        # Try to fetch metric from prometheus pod
        self._fetch_metric_from_promtheus_pod(
            metric='kube_persistentvolumeclaim_info')
        self._fetch_metric_from_promtheus_pod(
            metric='kube_pod_spec_volumes_persistentvolumeclaims_info')
        self.addCleanup(openshift_ops.switch_oc_project,
                        self._master, self._registry_project)
    def test_restart_prometheus_glusterfs_pod(self):
        """Validate restarting glusterfs pod"""

        # Add check for CRS version
        openshift_ops.switch_oc_project(
            self._master, self._registry_project_name)
        if not self.is_containerized_gluster():
            self.skipTest(
                "Skipping this test case as CRS version check "
                "can not be implemented")

        # Get one of the prometheus pod name and respective pvc name
        openshift_ops.switch_oc_project(
            self._master, self._prometheus_project_name)
        prometheus_pods = openshift_ops.oc_get_pods(
            self._master, selector=self._prometheus_resources_selector)
        if not prometheus_pods:
            self.skipTest(
                prometheus_pods, "Skipping test as prometheus"
                " pod is not present")
        prometheus_pod = list(prometheus_pods.keys())[0]
        pvc_name = openshift_ops.oc_get_custom_resource(
            self._master, "pod",
            ":.spec.volumes[*].persistentVolumeClaim.claimName",
            prometheus_pod)[0]
        self.assertTrue(
            pvc_name,
            "Failed to get pvc name from {} pod".format(prometheus_pod))
        iqn, _, node = self.verify_iscsi_sessions_and_multipath(
            pvc_name, prometheus_pod, rtype='pod',
            heketi_server_url=self._registry_heketi_server_url,
            is_registry_gluster=True)

        # Get the ip of active path
        devices = openshift_storage_libs.get_iscsi_block_devices_by_path(
            node, iqn)
        mpath = openshift_storage_libs.get_mpath_name_from_device_name(
            node, list(devices.keys())[0])
        mpath_dev = (
            openshift_storage_libs.get_active_and_enabled_devices_from_mpath(
                node, mpath))
        node_ip = devices[mpath_dev['active'][0]]

        # Get the name of gluster pod from the ip
        openshift_ops.switch_oc_project(
            self._master, self._registry_project_name)
        gluster_pods = openshift_ops.get_ocp_gluster_pod_details(
            self._master)
        active_pod_name = list(
            filter(lambda pod: (pod["pod_host_ip"] == node_ip), gluster_pods)
        )[0]["pod_name"]
        err_msg = "Failed to get the gluster pod name {} with active path"
        self.assertTrue(active_pod_name, err_msg.format(active_pod_name))
        g_pods = [pod['pod_name'] for pod in gluster_pods]
        g_pods.remove(active_pod_name)
        pod_list = [active_pod_name, g_pods[0]]
        for pod_name in pod_list:

            # Delete the glusterfs pods
            openshift_ops.switch_oc_project(
                self._master, self._prometheus_project_name)
            self._fetch_metric_from_promtheus_pod(
                metric='heketi_device_brick_count')

            openshift_ops.switch_oc_project(
                self._master, self._registry_project_name)
            g_pod_list_before = [
                pod["pod_name"]
                for pod in openshift_ops.get_ocp_gluster_pod_details(
                    self._master)]

            openshift_ops.oc_delete(self._master, 'pod', pod_name)
            self.addCleanup(
                self._guster_pod_delete, g_pod_list_before)

            # Wait for gluster pod to be absent
            openshift_ops.wait_for_resource_absence(
                self._master, 'pod', pod_name)

            # Try to fetch metric from prometheus pod
            openshift_ops.switch_oc_project(
                self._master, self._prometheus_project_name)
            self._fetch_metric_from_promtheus_pod(
                metric='heketi_device_brick_count')

            # Wait for new pod to come up
            openshift_ops.switch_oc_project(
                self._master, self._registry_project_name)
            self.assertTrue(self._get_newly_deployed_gluster_pod(
                g_pod_list_before), "Failed to get new pod")
            self._wait_for_gluster_pod_be_ready(g_pod_list_before)

            # Validate iscsi and multipath
            openshift_ops.switch_oc_project(
                self._master, self._prometheus_project_name)
            self.verify_iscsi_sessions_and_multipath(
                pvc_name, prometheus_pod, rtype='pod',
                heketi_server_url=self._registry_heketi_server_url,
                is_registry_gluster=True)

            # Try to fetch metric from prometheus pod
            self._fetch_metric_from_promtheus_pod(
                metric='heketi_device_brick_count')
    def test_prometheous_kill_bhv_brick_process(self):
        """Validate kill brick process of block hosting
        volume with prometheus workload running"""

        # Add check for CRS version
        openshift_ops.switch_oc_project(
            self._master, self._registry_project_name)
        if not self.is_containerized_gluster():
            self.skipTest("Skipping this test case as CRS"
                          " version check can not be implemented")

        # Get one of the prometheus pod name and respective pvc name
        openshift_ops.switch_oc_project(
            self._master, self._prometheus_project_name)
        prometheus_pods = openshift_ops.oc_get_pods(
            self._master, selector=self._prometheus_resources_selector)
        if not prometheus_pods:
            self.skipTest(
                prometheus_pods, "Skipping test as prometheus"
                " pod is not present")

        # Validate iscsi and multipath
        prometheus_pod = list(prometheus_pods.keys())[0]
        pvc_name = openshift_ops.oc_get_custom_resource(
            self._master, "pod",
            ":.spec.volumes[*].persistentVolumeClaim.claimName",
            prometheus_pod)
        self.assertTrue(pvc_name, "Failed to get PVC name")
        pvc_name = pvc_name[0]
        self.verify_iscsi_sessions_and_multipath(
            pvc_name, prometheus_pod, rtype='pod',
            heketi_server_url=self._registry_heketi_server_url,
            is_registry_gluster=True)

        # Try to fetch metric from prometheus pod
        self._fetch_metric_from_promtheus_pod(
            metric='heketi_device_brick_count')

        # Kill the brick process of a BHV
        gluster_node = list(self._registry_servers_info.keys())[0]
        openshift_ops.switch_oc_project(
            self._master, self._registry_project_name)
        bhv_name = self.get_block_hosting_volume_by_pvc_name(
            pvc_name, heketi_server_url=self._registry_heketi_server_url,
            gluster_node=gluster_node, ocp_client_node=self._master)
        vol_status = gluster_ops.get_gluster_vol_status(bhv_name)
        gluster_node_ip, brick_pid = None, None
        for g_node, g_node_data in vol_status.items():
            for process_name, process_data in g_node_data.items():
                if process_name.startswith("/var"):
                    gluster_node_ip = g_node
                    brick_pid = process_data["pid"]
                    break
            if gluster_node_ip and brick_pid:
                break
        self.assertIsNotNone(brick_pid, "Could not find pid for brick")
        cmd = "kill -9 {}".format(brick_pid)
        openshift_ops.cmd_run_on_gluster_pod_or_node(
            self._master, cmd, gluster_node_ip)
        self.addCleanup(self._guster_volume_cleanup, bhv_name)

        # Check if the brick-process has been killed
        killed_pid_cmd = (
            "ps -p {} -o pid --no-headers".format(brick_pid))
        try:
            openshift_ops.cmd_run_on_gluster_pod_or_node(
                self._master, killed_pid_cmd, gluster_node_ip)
        except exceptions.ExecutionError:
            g.log.info("Brick process {} was killed"
                       "successfully".format(brick_pid))

        # Try to fetch metric from prometheus pod
        openshift_ops.switch_oc_project(
            self._master, self._prometheus_project_name)
        self._fetch_metric_from_promtheus_pod(
            metric='heketi_device_brick_count')

        # Start the bhv using force
        openshift_ops.switch_oc_project(
            self._master, self._registry_project_name)
        start_vol, _, _ = volume_ops.volume_start(
            gluster_node_ip, bhv_name, force=True)
        self.assertFalse(
            start_vol, "Failed to start volume {}"
            " using force".format(bhv_name))

        # Validate iscsi and multipath
        openshift_ops.switch_oc_project(
            self._master, self._prometheus_project_name)
        self.verify_iscsi_sessions_and_multipath(
            pvc_name, prometheus_pod, rtype='pod',
            heketi_server_url=self._registry_heketi_server_url,
            is_registry_gluster=True)

        # Try to fetch metric from prometheus pod
        self._fetch_metric_from_promtheus_pod(
            metric='heketi_device_brick_count')
    def initiator_side_failures(self):

        # get storage ips of glusterfs pods
        keys = self.gluster_servers
        gluster_ips = []
        for key in keys:
            gluster_ips.append(self.gluster_servers_info[key]['storage'])
        gluster_ips.sort()

        self.create_storage_class()
        self.create_and_wait_for_pvc()

        # find iqn and hacount from volume info
        pv_name = get_pv_name_from_pvc(self.node, self.pvc_name)
        custom = [r':.metadata.annotations."gluster\.org\/volume\-id"']
        vol_id = oc_get_custom_resource(self.node, 'pv', custom, pv_name)[0]
        vol_info = heketi_blockvolume_info(
            self.heketi_client_node, self.heketi_server_url, vol_id, json=True)
        iqn = vol_info['blockvolume']['iqn']
        hacount = int(self.sc['hacount'])

        # create app pod
        dc_name, pod_name = self.create_dc_with_pvc(self.pvc_name)

        # When we have to verify iscsi login  devices & mpaths, we run it twice
        for i in range(2):

            # get node hostname from pod info
            pod_info = oc_get_pods(
                self.node, selector='deploymentconfig=%s' % dc_name)
            node = pod_info[pod_name]['node']

            # get the iscsi sessions info from the node
            iscsi = get_iscsi_session(node, iqn)
            self.assertEqual(hacount, len(iscsi))
            iscsi.sort()
            self.assertEqual(set(iscsi), (set(gluster_ips) & set(iscsi)))

            # get the paths info from the node
            devices = get_iscsi_block_devices_by_path(node, iqn).keys()
            self.assertEqual(hacount, len(devices))

            # get mpath names and verify that only one mpath is there
            mpaths = set()
            for device in devices:
                mpaths.add(get_mpath_name_from_device_name(node, device))
            self.assertEqual(1, len(mpaths))

            validate_multipath_pod(
                self.node, pod_name, hacount, mpath=list(mpaths)[0])

            # When we have to verify iscsi session logout, we run only once
            if i == 1:
                break

            # make node unschedulabe where pod is running
            oc_adm_manage_node(
                self.node, '--schedulable=false', nodes=[node])

            # make node schedulabe where pod is running
            self.addCleanup(
                oc_adm_manage_node, self.node, '--schedulable=true',
                nodes=[node])

            # delete pod so it get respun on any other node
            oc_delete(self.node, 'pod', pod_name)
            wait_for_resource_absence(self.node, 'pod', pod_name)

            # wait for pod to come up
            pod_name = get_pod_name_from_dc(self.node, dc_name)
            wait_for_pod_be_ready(self.node, pod_name)

            # get the iscsi session from the previous node to verify logout
            iscsi = get_iscsi_session(node, iqn, raise_on_error=False)
            self.assertFalse(iscsi)
    def test_prometheus_volume_metrics_on_node_reboot(self):
        """Validate volume metrics using prometheus before and after node
        reboot"""

        # Pod name for the entire test
        prefix = "autotest-{}".format(utils.get_random_str())

        # Create I/O pod with PVC
        pvc_name = self.create_and_wait_for_pvc()
        pod_name = openshift_ops.oc_create_tiny_pod_with_volume(
            self._master, pvc_name, prefix,
            image=self.io_container_image_cirros)
        self.addCleanup(openshift_ops.oc_delete, self._master, 'pod', pod_name,
                        raise_on_absence=False)
        openshift_ops.wait_for_pod_be_ready(
            self._master, pod_name, timeout=60, wait_step=5)

        # Write data on the volume and wait for 2 mins and sleep is must for
        # prometheus to get the exact values of the metrics
        ret, _, err = openshift_ops.oc_rsh(
            self._master, pod_name, "touch /mnt/file{1..1000}")
        self.assertEqual(
            ret, 0, "Failed to create files in the app pod "
                    "with {}".format(err))
        time.sleep(120)

        # Fetch the metrics and store in initial_metrics as dictionary
        initial_metrics = self._get_and_manipulate_metric_data(
            self.metrics, pvc_name)
        openshift_ops.switch_oc_project(
            self._master, self.storage_project_name)

        # Get the hostname to reboot where the pod is running
        pod_info = openshift_ops.oc_get_pods(self._master, name=pod_name)
        node_for_reboot = pod_info[pod_name]['node']

        # Get the vm name by the hostname
        vm_name = node_ops.find_vm_name_by_ip_or_hostname(node_for_reboot)

        # power off and on the vm, based on the vm type(either gluster or not)
        if node_for_reboot in self.gluster_servers:
            self.power_off_gluster_node_vm(vm_name, node_for_reboot)
            self.power_on_gluster_node_vm(vm_name, node_for_reboot)
        else:
            self.power_off_vm(vm_name)
            self.power_on_vm(vm_name)
            openshift_ops.wait_for_ocp_node_be_ready(
                self._master, node_for_reboot)

        # Create the new pod and validate the prometheus metrics
        pod_name = openshift_ops.oc_create_tiny_pod_with_volume(
            self._master, pvc_name, prefix)
        self.addCleanup(openshift_ops.oc_delete, self._master, 'pod', pod_name)

        # Wait for POD be up and running and prometheus to refresh the data
        openshift_ops.wait_for_pod_be_ready(
            self._master, pod_name, timeout=60, wait_step=5)
        time.sleep(120)

        # Fetching the metrics and storing in final_metrics as dictionary and
        # validating with initial_metrics
        final_metrics = self._get_and_manipulate_metric_data(
            self.metrics, pvc_name)
        self.assertEqual(dict(initial_metrics), dict(final_metrics),
                         "Metrics are different post node reboot")