def test_restart_gluster_block_provisioner_pod(self):
        """Restart gluster-block provisioner pod."""

        # Get glusterblock provisioner dc name
        cmd = ("oc get dc | awk '{ print $1 }' | "
               "grep -e glusterblock -e provisioner")
        dc_name = command.cmd_run(cmd, self.ocp_master_node[0], True)

        # create heketi block volume
        vol_info = heketi_blockvolume_create(self.heketi_client_node,
                                             self.heketi_server_url,
                                             size=5, json=True)
        self.assertTrue(vol_info, "Failed to create heketi block"
                        "volume of size 5")
        self.addCleanup(heketi_blockvolume_delete, self.heketi_client_node,
                        self.heketi_server_url, vol_info['id'])

        # restart gluster-block-provisioner-pod
        pod_name = get_pod_name_from_dc(self.ocp_master_node[0], dc_name)
        oc_delete(self.ocp_master_node[0], 'pod', pod_name)
        wait_for_resource_absence(self.ocp_master_node[0], 'pod', pod_name)

        # new gluster-pod name
        pod_name = get_pod_name_from_dc(self.ocp_master_node[0], dc_name)
        wait_for_pod_be_ready(self.ocp_master_node[0], pod_name)

        # create new heketi block volume
        vol_info = heketi_blockvolume_create(self.heketi_client_node,
                                             self.heketi_server_url,
                                             size=2, json=True)
        self.assertTrue(vol_info, "Failed to create heketi block"
                        "volume of size 2")
        heketi_blockvolume_delete(self.heketi_client_node,
                                  self.heketi_server_url,
                                  vol_info['id'])
    def test_dynamic_provisioning_glusterfile_glusterpod_failure(self):
        """Create glusterblock PVC when gluster pod is down."""

        # Check that we work with containerized Gluster
        if not self.is_containerized_gluster():
            self.skipTest("Only containerized Gluster clusters are supported.")

        mount_path = "/mnt"
        datafile_path = '%s/fake_file_for_%s' % (mount_path, self.id())

        # Create secret and storage class
        self.create_storage_class()

        # Create PVC
        pvc_name = self.create_and_wait_for_pvc()

        # Create app POD with attached volume
        pod_name = oc_create_tiny_pod_with_volume(
            self.node, pvc_name, "test-pvc-mount-on-app-pod",
            mount_path=mount_path)
        self.addCleanup(
            wait_for_resource_absence, self.node, 'pod', pod_name)
        self.addCleanup(oc_delete, self.node, 'pod', pod_name)

        # Wait for app POD be up and running
        wait_for_pod_be_ready(
            self.node, pod_name, timeout=60, wait_step=2)

        # Run IO in background
        io_cmd = "oc rsh %s dd if=/dev/urandom of=%s bs=1000K count=900" % (
            pod_name, datafile_path)
        async_io = g.run_async(self.node, io_cmd, "root")

        # Pick up one of the hosts which stores PV brick (4+ nodes case)
        gluster_pod_data = get_gluster_pod_names_by_pvc_name(
            self.node, pvc_name)[0]

        # Delete glusterfs POD from chosen host and wait for spawn of new one
        oc_delete(self.node, 'pod', gluster_pod_data["pod_name"])
        cmd = ("oc get pods -o wide | grep glusterfs | grep %s | "
               "grep -v Terminating | awk '{print $1}'") % (
                   gluster_pod_data["host_name"])
        for w in Waiter(600, 15):
            out = self.cmd_run(cmd)
            new_gluster_pod_name = out.strip().split("\n")[0].strip()
            if not new_gluster_pod_name:
                continue
            else:
                break
        if w.expired:
            error_msg = "exceeded timeout, new gluster pod not created"
            g.log.error(error_msg)
            raise ExecutionError(error_msg)
        new_gluster_pod_name = out.strip().split("\n")[0].strip()
        g.log.info("new gluster pod name is %s" % new_gluster_pod_name)
        wait_for_pod_be_ready(self.node, new_gluster_pod_name)

        # Check that async IO was not interrupted
        ret, out, err = async_io.async_communicate()
        self.assertEqual(ret, 0, "IO %s failed on %s" % (io_cmd, self.node))
Exemplo n.º 3
0
    def test_pv_resize_with_prefix_for_name_and_size(
            self, create_vol_name_prefix=False, valid_size=True):
        """Validate PV resize with and without name prefix"""
        dir_path = "/mnt/"
        node = self.ocp_client[0]

        # Create PVC
        self.create_storage_class(
            allow_volume_expansion=True,
            create_vol_name_prefix=create_vol_name_prefix)
        pvc_name = self.create_and_wait_for_pvc()

        # Create DC with POD and attached PVC to it.
        dc_name = oc_create_app_dc_with_io(node, pvc_name)
        self.addCleanup(oc_delete, node, 'dc', dc_name)
        self.addCleanup(scale_dc_pod_amount_and_wait, node, dc_name, 0)

        pod_name = get_pod_name_from_dc(node, dc_name)
        wait_for_pod_be_ready(node, pod_name)
        if create_vol_name_prefix:
            ret = heketi_ops.verify_volume_name_prefix(
                node, self.sc['volumenameprefix'], self.sc['secretnamespace'],
                pvc_name, self.heketi_server_url)
            self.assertTrue(ret, "verify volnameprefix failed")
        cmd = ("dd if=/dev/urandom of=%sfile " "bs=100K count=1000") % dir_path
        ret, out, err = oc_rsh(node, pod_name, cmd)
        self.assertEqual(ret, 0,
                         "Failed to execute command %s on %s" % (cmd, node))
        pv_name = get_pv_name_from_pvc(node, pvc_name)

        # If resize size is invalid then size should not change
        if valid_size:
            cmd = ("dd if=/dev/urandom of=%sfile2 "
                   "bs=100K count=10000") % dir_path
            with self.assertRaises(AssertionError):
                ret, out, err = oc_rsh(node, pod_name, cmd)
                msg = ("Command '%s' was expected to fail on '%s' node. "
                       "But it returned following: ret is '%s', err is '%s' "
                       "and out is '%s'" % (cmd, node, ret, err, out))
                raise ExecutionError(msg)
            pvc_size = 2
            resize_pvc(node, pvc_name, pvc_size)
            verify_pvc_size(node, pvc_name, pvc_size)
            verify_pv_size(node, pv_name, pvc_size)
        else:
            invalid_pvc_size = 'ten'
            with self.assertRaises(AssertionError):
                resize_pvc(node, pvc_name, invalid_pvc_size)
            verify_pvc_size(node, pvc_name, 1)
            verify_pv_size(node, pv_name, 1)

        oc_delete(node, 'pod', pod_name)
        wait_for_resource_absence(node, 'pod', pod_name)
        pod_name = get_pod_name_from_dc(node, dc_name)
        wait_for_pod_be_ready(node, pod_name)
        cmd = ("dd if=/dev/urandom of=%sfile_new "
               "bs=50K count=10000") % dir_path
        ret, out, err = oc_rsh(node, pod_name, cmd)
        self.assertEqual(ret, 0,
                         "Failed to execute command %s on %s" % (cmd, node))
    def test_100gb_block_pvc_create_and_delete_twice(self):
        """Validate creation and deletion of blockvoume of size 100GB"""
        # Define required space, bhv size required for on 100GB block PVC
        size, bhv_size, required_space = 100, 103, 309
        h_node, h_url = self.heketi_client_node, self.heketi_server_url
        prefix = 'autotest-pvc-{}'.format(utils.get_random_str(size=5))

        # Skip test if required free space is not available
        free_space = get_total_free_space(self.heketi_client_node,
                                          self.heketi_server_url)[0]
        if free_space < required_space:
            self.skipTest("Available free space {} is less than the required "
                          "free space {}".format(free_space, required_space))

        # Create block hosting volume of 103GB required for 100GB block PVC
        bhv = heketi_volume_create(h_node,
                                   h_url,
                                   bhv_size,
                                   block=True,
                                   json=True)['id']
        self.addCleanup(heketi_volume_delete, h_node, h_url, bhv)

        for _ in range(2):
            # Create PVC of size 100GB
            pvc_name = self.create_and_wait_for_pvc(pvc_size=size,
                                                    pvc_name_prefix=prefix)
            match_pvc_and_pv(self.node, prefix)

            # Delete the PVC
            oc_delete(self.node, 'pvc', pvc_name)
            wait_for_resource_absence(self.node, 'pvc', pvc_name)
Exemplo n.º 5
0
    def test_dev_path_mapping_gluster_pod_reboot(self):
        """Validate dev path mapping for app pods with file volume after reboot
        """
        # Skip the tc for independent mode
        if not self.is_containerized_gluster():
            self.skipTest("Skip TC as it is not supported in independent mode")

        # Create file volume with app pod and verify IO's
        # and Compare path, uuid, vg_name
        pod_name, dc_name, use_percent = self._create_app_pod_and_verify_pvs()

        # Fetch the gluster pod name from node
        g_pod = self._get_gluster_pod()

        # Respin a gluster pod
        openshift_ops.oc_delete(self.node, "pod", g_pod)
        self.addCleanup(self._guster_pod_delete_cleanup)

        # Wait for pod to get absent
        openshift_ops.wait_for_resource_absence(self.node, "pod", g_pod)

        # Fetch gluster pod after delete
        g_pod = self._get_gluster_pod()
        openshift_ops.wait_for_pod_be_ready(self.node, g_pod)

        # Check if IO's are running after respin of gluster pod
        use_percent_after = self._get_space_use_percent_in_app_pod(pod_name)
        self.assertNotEqual(
            use_percent, use_percent_after,
            "Failed to execute IO's in the app pod {} after respin".format(
                pod_name))
    def test_pvc_deletion_while_pod_is_running(self):
        """Validate PVC deletion while pod is running"""
        if get_openshift_version() <= "3.9":
            self.skipTest("PVC deletion while pod is running is not supported"
                          " in OCP older than 3.9")

        # Create DC with POD and attached PVC to it
        sc_name = self.create_storage_class()
        pvc_name = self.create_and_wait_for_pvc(sc_name=sc_name)
        dc_name, pod_name = self.create_dc_with_pvc(pvc_name)

        # Delete PVC
        oc_delete(self.node, 'pvc', self.pvc_name)

        with self.assertRaises(ExecutionError):
            wait_for_resource_absence(self.node,
                                      'pvc',
                                      self.pvc_name,
                                      interval=3,
                                      timeout=30)

        # Make sure we are able to work with files on the mounted volume
        # after deleting pvc.
        filepath = "/mnt/file_for_testing_volume.log"
        cmd = "dd if=/dev/urandom of=%s bs=1K count=100" % filepath
        ret, out, err = oc_rsh(self.node, pod_name, cmd)
        self.assertEqual(
            ret, 0, "Failed to execute command %s on %s" % (cmd, self.node))
    def test_restart_gluster_block_provisioner_pod(self):
        """Restart gluster-block provisioner pod
        """

        # create heketi block volume
        vol_info = heketi_blockvolume_create(self.heketi_client_node,
                                             self.heketi_server_url,
                                             size=5, json=True)
        self.assertTrue(vol_info, "Failed to create heketi block"
                        "volume of size 5")
        self.addCleanup(heketi_blockvolume_delete, self.heketi_client_node,
                        self.heketi_server_url, vol_info['id'])

        # restart gluster-block-provisioner-pod
        dc_name = "glusterblock-%s-provisioner-dc" % self.storage_project_name
        pod_name = get_pod_name_from_dc(self.ocp_master_node[0], dc_name)
        oc_delete(self.ocp_master_node[0], 'pod', pod_name)
        wait_for_resource_absence(self.ocp_master_node[0], 'pod', pod_name)

        # new gluster-pod name
        pod_name = get_pod_name_from_dc(self.ocp_master_node[0], dc_name)
        wait_for_pod_be_ready(self.ocp_master_node[0], pod_name)

        # create new heketi block volume
        vol_info = heketi_blockvolume_create(self.heketi_client_node,
                                             self.heketi_server_url,
                                             size=2, json=True)
        self.assertTrue(vol_info, "Failed to create heketi block"
                        "volume of size 2")
        heketi_blockvolume_delete(self.heketi_client_node,
                                  self.heketi_server_url,
                                  vol_info['id'])
Exemplo n.º 8
0
    def test_restart_gluster_block_provisioner_pod(self):
        """Restart gluster-block provisioner pod
        """

        # create heketi block volume
        vol_info = heketi_blockvolume_create(self.heketi_client_node,
                                             self.heketi_server_url,
                                             size=5,
                                             json=True)
        self.assertTrue(vol_info, "Failed to create heketi block"
                        "volume of size 5")
        self.addCleanup(heketi_blockvolume_delete, self.heketi_client_node,
                        self.heketi_server_url, vol_info['id'])

        # restart gluster-block-provisioner-pod
        dc_name = "glusterblock-%s-provisioner-dc" % self.storage_project_name
        pod_name = get_pod_name_from_dc(self.ocp_master_node[0], dc_name)
        oc_delete(self.ocp_master_node[0], 'pod', pod_name)
        wait_for_resource_absence(self.ocp_master_node[0], 'pod', pod_name)

        # new gluster-pod name
        pod_name = get_pod_name_from_dc(self.ocp_master_node[0], dc_name)
        wait_for_pod_be_ready(self.ocp_master_node[0], pod_name)

        # create new heketi block volume
        vol_info = heketi_blockvolume_create(self.heketi_client_node,
                                             self.heketi_server_url,
                                             size=2,
                                             json=True)
        self.assertTrue(vol_info, "Failed to create heketi block"
                        "volume of size 2")
        heketi_blockvolume_delete(self.heketi_client_node,
                                  self.heketi_server_url, vol_info['id'])
    def test_node_failure_pv_mounted(self):
        """Test node failure when PV is mounted with app pods running"""
        filepath = "/mnt/file_for_testing_volume.log"
        pvc_name = self.create_and_wait_for_pvc()

        dc_and_pod_names = self.create_dcs_with_pvc(pvc_name)
        dc_name, pod_name = dc_and_pod_names[pvc_name]

        mount_point = "df -kh /mnt -P | tail -1 | awk '{{print $1}}'"
        pod_cmd = "oc exec {} -- {}".format(pod_name, mount_point)
        hostname = command.cmd_run(pod_cmd, hostname=self.node)
        hostname = hostname.split(":")[0]

        vm_name = find_vm_name_by_ip_or_hostname(hostname)
        self.addCleanup(power_on_vm_by_name, vm_name)
        power_off_vm_by_name(vm_name)

        cmd = "dd if=/dev/urandom of={} bs=1K count=100".format(filepath)
        ret, _, err = oc_rsh(self.node, pod_name, cmd)
        self.assertFalse(
            ret, "Failed to execute command {} on {} with error {}".format(
                cmd, self.node, err))

        oc_delete(self.node, 'pod', pod_name)
        wait_for_resource_absence(self.node, 'pod', pod_name)
        pod_name = get_pod_name_from_dc(self.node, dc_name)
        wait_for_pod_be_ready(self.node, pod_name)

        ret, _, err = oc_rsh(self.node, pod_name, cmd)
        self.assertFalse(
            ret, "Failed to execute command {} on {} with error {}".format(
                cmd, self.node, err))
Exemplo n.º 10
0
    def test_heketi_logs_after_heketi_pod_restart(self):

        h_node, h_server = self.heketi_client_node, self.heketi_server_url
        find_string_in_log = r"Started background pending operations cleaner"
        ocp_node = self.ocp_master_node[0]

        # Restart heketi pod
        heketi_pod_name = get_pod_name_from_dc(ocp_node, self.heketi_dc_name)
        oc_delete(ocp_node,
                  'pod',
                  heketi_pod_name,
                  collect_logs=self.heketi_logs_before_delete)
        self.addCleanup(self._heketi_pod_delete_cleanup)
        wait_for_resource_absence(ocp_node, 'pod', heketi_pod_name)
        heketi_pod_name = get_pod_name_from_dc(ocp_node, self.heketi_dc_name)
        wait_for_pod_be_ready(ocp_node, heketi_pod_name)
        self.assertTrue(hello_heketi(h_node, h_server),
                        "Heketi server {} is not alive".format(h_server))

        # Collect logs after heketi pod restart
        cmd = "oc logs {}".format(heketi_pod_name)
        out = cmd_run(cmd, hostname=ocp_node)

        # Validate string is present in heketi logs
        pending_check = re.compile(find_string_in_log)
        entry_list = pending_check.findall(out)
        self.assertIsNotNone(entry_list,
                             "Failed to find entries in heketi logs")

        for entry in entry_list:
            self.assertEqual(
                entry, find_string_in_log,
                "Failed to validate, Expected {}; Actual {}".format(
                    find_string_in_log, entry))
    def test_pvc_deletion_while_pod_is_running(self):
        """Validate PVC deletion while pod is running"""
        if get_openshift_version() <= "3.9":
            self.skipTest(
                "PVC deletion while pod is running is not supported"
                " in OCP older than 3.9")

        # Create DC with POD and attached PVC to it
        sc_name = self.create_storage_class()
        pvc_name = self.create_and_wait_for_pvc(sc_name=sc_name)
        dc_name, pod_name = self.create_dc_with_pvc(pvc_name)

        # Delete PVC
        oc_delete(self.node, 'pvc', self.pvc_name)

        with self.assertRaises(ExecutionError):
            wait_for_resource_absence(
                self.node, 'pvc', self.pvc_name, interval=3, timeout=30)

        # Make sure we are able to work with files on the mounted volume
        # after deleting pvc.
        filepath = "/mnt/file_for_testing_volume.log"
        cmd = "dd if=/dev/urandom of=%s bs=1K count=100" % filepath
        ret, out, err = oc_rsh(self.node, pod_name, cmd)
        self.assertEqual(
            ret, 0, "Failed to execute command %s on %s" % (cmd, self.node))
Exemplo n.º 12
0
    def _guster_pod_delete_cleanup(self, g_pod_list_before):
        """Cleanup for deletion of gluster pod using force delete"""
        # Switch to gluster project
        openshift_ops.switch_oc_project(self._master,
                                        self._registry_project_name)
        try:
            # Fetch gluster pod after delete
            pod_name = self._get_newly_deployed_gluster_pod(g_pod_list_before)

            # Check if pod name is empty i.e no new pod come up so use old pod
            openshift_ops.wait_for_pod_be_ready(
                self._master,
                pod_name[0] if pod_name else g_pod_list_before[0],
                timeout=1)
        except exceptions.ExecutionError:
            # Force delete and wait for new pod to come up
            openshift_ops.oc_delete(self._master,
                                    'pod',
                                    g_pod_list_before[0],
                                    is_force=True)
            openshift_ops.wait_for_resource_absence(self._master, 'pod',
                                                    g_pod_list_before[0])

            # Fetch gluster pod after force delete
            g_new_pod = self._get_newly_deployed_gluster_pod(g_pod_list_before)
            openshift_ops.wait_for_pod_be_ready(self._master, g_new_pod[0])
    def test_dynamic_provisioning_glusterfile_glusterpod_failure(self):
        """Create glusterblock PVC when gluster pod is down."""

        # Check that we work with containerized Gluster
        if not self.is_containerized_gluster():
            self.skipTest("Only containerized Gluster clusters are supported.")

        mount_path = "/mnt"
        datafile_path = '%s/fake_file_for_%s' % (mount_path, self.id())

        # Create secret and storage class
        self.create_storage_class()

        # Create PVC
        pvc_name = self.create_and_wait_for_pvc()

        # Create app POD with attached volume
        pod_name = oc_create_tiny_pod_with_volume(self.node,
                                                  pvc_name,
                                                  "test-pvc-mount-on-app-pod",
                                                  mount_path=mount_path)
        self.addCleanup(wait_for_resource_absence, self.node, 'pod', pod_name)
        self.addCleanup(oc_delete, self.node, 'pod', pod_name)

        # Wait for app POD be up and running
        wait_for_pod_be_ready(self.node, pod_name, timeout=60, wait_step=2)

        # Run IO in background
        io_cmd = "oc rsh %s dd if=/dev/urandom of=%s bs=1000K count=900" % (
            pod_name, datafile_path)
        async_io = g.run_async(self.node, io_cmd, "root")

        # Pick up one of the hosts which stores PV brick (4+ nodes case)
        gluster_pod_data = get_gluster_pod_names_by_pvc_name(
            self.node, pvc_name)[0]

        # Delete glusterfs POD from chosen host and wait for spawn of new one
        oc_delete(self.node, 'pod', gluster_pod_data["pod_name"])
        cmd = ("oc get pods -o wide | grep glusterfs | grep %s | "
               "grep -v Terminating | awk '{print $1}'") % (
                   gluster_pod_data["host_name"])
        for w in Waiter(600, 15):
            out = self.cmd_run(cmd)
            new_gluster_pod_name = out.strip().split("\n")[0].strip()
            if not new_gluster_pod_name:
                continue
            else:
                break
        if w.expired:
            error_msg = "exceeded timeout, new gluster pod not created"
            g.log.error(error_msg)
            raise ExecutionError(error_msg)
        new_gluster_pod_name = out.strip().split("\n")[0].strip()
        g.log.info("new gluster pod name is %s" % new_gluster_pod_name)
        wait_for_pod_be_ready(self.node, new_gluster_pod_name)

        # Check that async IO was not interrupted
        ret, out, err = async_io.async_communicate()
        self.assertEqual(ret, 0, "IO %s failed on %s" % (io_cmd, self.node))
    def _pv_resize(self, exceed_free_space):
        dir_path = "/mnt"
        pvc_size_gb = 1

        available_size_gb = self._available_disk_free_space()

        # Create PVC
        self.create_storage_class(allow_volume_expansion=True)
        pvc_name = self.create_and_wait_for_pvc(pvc_size=pvc_size_gb)

        # Create DC with POD and attached PVC to it
        dc_name = oc_create_app_dc_with_io(
            self.node, pvc_name, image=self.io_container_image_cirros)
        self.addCleanup(oc_delete, self.node, 'dc', dc_name)
        self.addCleanup(scale_dc_pod_amount_and_wait, self.node, dc_name, 0)
        pod_name = get_pod_name_from_dc(self.node, dc_name)
        wait_for_pod_be_ready(self.node, pod_name)

        if exceed_free_space:
            exceed_size = available_size_gb + 10

            # Try to expand existing PVC exceeding free space
            resize_pvc(self.node, pvc_name, exceed_size)
            wait_for_events(self.node,
                            obj_name=pvc_name,
                            event_reason='VolumeResizeFailed')

            # Check that app POD is up and runnig then try to write data
            wait_for_pod_be_ready(self.node, pod_name)
            cmd = ("dd if=/dev/urandom of=%s/autotest bs=100K count=1" %
                   dir_path)
            ret, out, err = oc_rsh(self.node, pod_name, cmd)
            self.assertEqual(
                ret, 0,
                "Failed to write data after failed attempt to expand PVC.")
        else:
            # Expand existing PVC using all the available free space
            expand_size_gb = available_size_gb - pvc_size_gb
            resize_pvc(self.node, pvc_name, expand_size_gb)
            verify_pvc_size(self.node, pvc_name, expand_size_gb)
            pv_name = get_pv_name_from_pvc(self.node, pvc_name)
            verify_pv_size(self.node, pv_name, expand_size_gb)
            wait_for_events(self.node,
                            obj_name=pvc_name,
                            event_reason='VolumeResizeSuccessful')

            # Recreate app POD
            oc_delete(self.node, 'pod', pod_name)
            wait_for_resource_absence(self.node, 'pod', pod_name)
            pod_name = get_pod_name_from_dc(self.node, dc_name)
            wait_for_pod_be_ready(self.node, pod_name)

            # Write data on the expanded PVC
            cmd = ("dd if=/dev/urandom of=%s/autotest "
                   "bs=1M count=1025" % dir_path)
            ret, out, err = oc_rsh(self.node, pod_name, cmd)
            self.assertEqual(ret, 0,
                             "Failed to write data on the expanded PVC")
Exemplo n.º 15
0
    def test_arbiter_volume_delete_using_pvc(self):
        """Test Arbiter volume delete using pvc when volume is not mounted
           on app pod
        """
        prefix = "autotest-%s" % utils.get_random_str()

        # Create sc with gluster arbiter info
        sc_name = self.create_storage_class(vol_name_prefix=prefix,
                                            is_arbiter_vol=True)

        # Create PVC and wait for it to be in 'Bound' state
        pvc_name = self.create_and_wait_for_pvc(pvc_name_prefix=prefix,
                                                sc_name=sc_name)

        # Get vol info
        gluster_vol_info = openshift_ops.get_gluster_vol_info_by_pvc_name(
            self.node, pvc_name)

        # Verify arbiter volume properties
        self.verify_amount_and_proportion_of_arbiter_and_data_bricks(
            gluster_vol_info)

        # Get volume ID
        gluster_vol_id = gluster_vol_info["gluster_vol_id"]

        # Delete the pvc
        openshift_ops.oc_delete(self.node, 'pvc', pvc_name)
        openshift_ops.wait_for_resource_absence(self.node, 'pvc', pvc_name)

        # Check the heketi volume list if pvc is deleted
        g.log.info("List heketi volumes")
        heketi_volumes = heketi_ops.heketi_volume_list(self.heketi_client_node,
                                                       self.heketi_server_url)

        err_msg = "Failed to delete heketi volume by prefix %s" % prefix
        self.assertNotIn(prefix, heketi_volumes, err_msg)

        # Check presence for the gluster volume
        get_gluster_vol_info = volume_ops.get_volume_info(
            "auto_get_gluster_endpoint", gluster_vol_id)
        err_msg = "Failed to delete gluster volume %s" % gluster_vol_id
        self.assertFalse(get_gluster_vol_info, err_msg)

        # Check presence of bricks and lvs
        for brick in gluster_vol_info['bricks']['brick']:
            gluster_node_ip, brick_name = brick["name"].split(":")

            with self.assertRaises(exceptions.ExecutionError):
                cmd = "df %s" % brick_name
                openshift_ops.cmd_run_on_gluster_pod_or_node(
                    self.node, cmd, gluster_node_ip)

            with self.assertRaises(exceptions.ExecutionError):
                lv_match = re.search(BRICK_REGEX, brick["name"])
                if lv_match:
                    cmd = "lvs %s" % lv_match.group(2).strip()
                    openshift_ops.cmd_run_on_gluster_pod_or_node(
                        self.node, cmd, gluster_node_ip)
    def test_respin_prometheus_pod(self, motive="delete"):
        """Validate respin of prometheus pod"""
        if motive == 'drain':

            # Get the number of infra nodes
            infra_node_count_cmd = (
                'oc get nodes '
                '--no-headers -l node-role.kubernetes.io/infra=true|wc -l')
            infra_node_count = command.cmd_run(
                infra_node_count_cmd, self._master)

            # Skip test case if number infra nodes are less than #2
            if int(infra_node_count) < 2:
                self.skipTest('Available number of infra nodes "{}", it should'
                              ' be more than 1'.format(infra_node_count))

        # Get PVC names and pod names
        pod_names, pvc_names = self._get_pod_names_and_pvc_names()

        # Validate iscsi and multipath
        for (pvc_name, pod_name) in zip(pvc_names, pod_names):
            _, _, node = self.verify_iscsi_sessions_and_multipath(
                pvc_name, pod_name[0], rtype='pod',
                heketi_server_url=self._registry_heketi_server_url,
                is_registry_gluster=True)

        # Delete the prometheus pods
        if motive == 'delete':
            for pod_name in pod_names:
                openshift_ops.oc_delete(self._master, 'pod', pod_name[0])

        # Drain the node
        elif motive == 'drain':
            drain_cmd = ('oc adm drain {} --force=true --ignore-daemonsets '
                         '--delete-local-data'.format(node))
            command.cmd_run(drain_cmd, hostname=self._master)

            # Cleanup to make node schedulable
            cmd_schedule = (
                'oc adm manage-node {} --schedulable=true'.format(node))
            self.addCleanup(
                command.cmd_run, cmd_schedule, hostname=self._master)

        # Validate that there should be no or zero pods in non-running state
        field_selector, pod_count = "status.phase!=Running", 0
        openshift_ops.wait_for_pods_be_ready(
            self._master, pod_count, field_selector=field_selector)

        # Validate iscsi and multipath
        for (pvc_name, pod_name) in zip(pvc_names, pod_names):
            self.verify_iscsi_sessions_and_multipath(
                pvc_name, pod_name[0], rtype='pod',
                heketi_server_url=self._registry_heketi_server_url,
                is_registry_gluster=True)

        # Try to fetch metric from prometheus pod
        self._fetch_metric_from_promtheus_pod(metric='kube_node_info')
Exemplo n.º 17
0
    def test_restart_heketi_pod(self):
        """Validate restarting heketi pod"""

        # create heketi volume
        vol_info = heketi_volume_create(self.heketi_client_node,
                                        self.heketi_server_url,
                                        size=1,
                                        json=True)
        self.assertTrue(vol_info, "Failed to create heketi volume of size 1")
        self.addCleanup(heketi_volume_delete,
                        self.heketi_client_node,
                        self.heketi_server_url,
                        vol_info['id'],
                        raise_on_error=False)
        topo_info = heketi_topology_info(self.heketi_client_node,
                                         self.heketi_server_url,
                                         json=True)

        # get heketi-pod name
        heketi_pod_name = get_pod_name_from_dc(self.ocp_master_node[0],
                                               self.heketi_dc_name)

        # delete heketi-pod (it restarts the pod)
        oc_delete(self.ocp_master_node[0],
                  'pod',
                  heketi_pod_name,
                  collect_logs=self.heketi_logs_before_delete)
        wait_for_resource_absence(self.ocp_master_node[0], 'pod',
                                  heketi_pod_name)

        # get new heketi-pod name
        heketi_pod_name = get_pod_name_from_dc(self.ocp_master_node[0],
                                               self.heketi_dc_name)
        wait_for_pod_be_ready(self.ocp_master_node[0], heketi_pod_name)

        # check heketi server is running
        self.assertTrue(
            hello_heketi(self.heketi_client_node, self.heketi_server_url),
            "Heketi server %s is not alive" % self.heketi_server_url)

        # compare the topology
        new_topo_info = heketi_topology_info(self.heketi_client_node,
                                             self.heketi_server_url,
                                             json=True)
        self.assertEqual(
            new_topo_info, topo_info, "topology info is not same,"
            " difference - %s" % diff(topo_info, new_topo_info))

        # create new volume
        vol_info = heketi_volume_create(self.heketi_client_node,
                                        self.heketi_server_url,
                                        size=2,
                                        json=True)
        self.assertTrue(vol_info, "Failed to create heketi volume of size 20")
        heketi_volume_delete(self.heketi_client_node, self.heketi_server_url,
                             vol_info['id'])
    def test_pv_resize_with_prefix_for_name(self,
                                            create_vol_name_prefix=False):
        """Validate PV resize with and without name prefix"""
        dir_path = "/mnt/"
        node = self.ocp_client[0]

        # Create PVC
        self.create_storage_class(
            allow_volume_expansion=True,
            create_vol_name_prefix=create_vol_name_prefix)
        pvc_name = self.create_and_wait_for_pvc()

        # Create DC with POD and attached PVC to it.
        dc_name = oc_create_app_dc_with_io(node, pvc_name)
        self.addCleanup(oc_delete, node, 'dc', dc_name)
        self.addCleanup(scale_dc_pod_amount_and_wait,
                        node, dc_name, 0)

        pod_name = get_pod_name_from_dc(node, dc_name)
        wait_for_pod_be_ready(node, pod_name)
        if create_vol_name_prefix:
            ret = heketi_ops.verify_volume_name_prefix(
                node, self.sc['volumenameprefix'],
                self.sc['secretnamespace'],
                pvc_name, self.heketi_server_url)
            self.assertTrue(ret, "verify volnameprefix failed")
        cmd = ("dd if=/dev/urandom of=%sfile "
               "bs=100K count=1000") % dir_path
        ret, out, err = oc_rsh(node, pod_name, cmd)
        self.assertEqual(ret, 0, "failed to execute command %s on %s" % (
                             cmd, node))
        cmd = ("dd if=/dev/urandom of=%sfile2 "
               "bs=100K count=10000") % dir_path
        with self.assertRaises(AssertionError):
            ret, out, err = oc_rsh(node, pod_name, cmd)
            msg = ("Command '%s' was expected to fail on '%s' node. "
                   "But it returned following: ret is '%s', err is '%s' "
                   "and out is '%s'" % (cmd, node, ret, err, out))
            raise ExecutionError(msg)

        pvc_size = 2
        resize_pvc(node, pvc_name, pvc_size)
        verify_pvc_size(node, pvc_name, pvc_size)
        pv_name = get_pv_name_from_pvc(node, pvc_name)
        verify_pv_size(node, pv_name, pvc_size)
        oc_delete(node, 'pod', pod_name)
        wait_for_resource_absence(node, 'pod', pod_name)
        pod_name = get_pod_name_from_dc(node, dc_name)
        wait_for_pod_be_ready(node, pod_name)
        cmd = ("dd if=/dev/urandom of=%sfile_new "
               "bs=50K count=10000") % dir_path
        ret, out, err = oc_rsh(node, pod_name, cmd)
        self.assertEqual(ret, 0, "failed to execute command %s on %s" % (
                             cmd, node))
Exemplo n.º 19
0
    def test_prometheus_volume_metrics_on_pod_restart(self):
        """Validate volume metrics using prometheus before and after pod
        restart"""

        # Create PVC and wait for it to be in 'Bound' state
        pvc_name = self.create_and_wait_for_pvc()
        pod_name = openshift_ops.oc_create_tiny_pod_with_volume(
            self._master, pvc_name, "autotest-volume",
            image=self.io_container_image_cirros)
        self.addCleanup(openshift_ops.oc_delete, self._master, 'pod', pod_name,
                        raise_on_absence=False)

        # Wait for POD be up and running
        openshift_ops.wait_for_pod_be_ready(
            self._master, pod_name, timeout=60, wait_step=2)

        # Write data on the volume and wait for 2 mins and sleep is must for
        # prometheus to get the exact values of the metrics
        self._run_io_on_the_pod(pod_name, 30)
        time.sleep(120)

        # Fetching the metrics and storing in initial_metrics as dictionary
        initial_metrics = self._get_and_manipulate_metric_data(
            self.metrics, pvc_name)

        # Mark the current node unschedulable on which app pod is running
        openshift_ops.switch_oc_project(
            self._master, self.storage_project_name)
        pod_info = openshift_ops.oc_get_pods(self._master, name=pod_name)
        openshift_ops.oc_adm_manage_node(
            self._master, '--schedulable=false',
            nodes=[pod_info[pod_name]["node"]])
        self.addCleanup(
            openshift_ops.oc_adm_manage_node, self._master,
            '--schedulable=true', nodes=[pod_info[pod_name]["node"]])

        # Delete the existing pod and create a new pod
        openshift_ops.oc_delete(self._master, 'pod', pod_name)
        pod_name = openshift_ops.oc_create_tiny_pod_with_volume(
            self._master, pvc_name, "autotest-volume")
        self.addCleanup(openshift_ops.oc_delete, self._master, 'pod', pod_name)

        # Wait for POD be up and running and prometheus to refresh the data
        openshift_ops.wait_for_pod_be_ready(
            self._master, pod_name, timeout=60, wait_step=2)
        time.sleep(120)

        # Fetching the metrics and storing in final_metrics as dictionary and
        # validating with initial_metrics
        final_metrics = self._get_and_manipulate_metric_data(
            self.metrics, pvc_name)
        self.assertEqual(dict(initial_metrics), dict(final_metrics),
                         "Metrics are different post pod restart")
    def _delete_and_wait_for_new_es_pod_to_come_up(self):

        # Force delete and wait for es pod to come up
        openshift_ops.switch_oc_project(
            self._master, self._logging_project_name)
        pod_name = openshift_ops.get_pod_name_from_dc(
            self._master, self._logging_es_dc)
        openshift_ops.oc_delete(self._master, 'pod', pod_name, is_force=True)
        openshift_ops.wait_for_resource_absence(self._master, 'pod', pod_name)
        new_pod_name = openshift_ops.get_pod_name_from_dc(
            self._master, self._logging_es_dc)
        openshift_ops.wait_for_pod_be_ready(
            self._master, new_pod_name, timeout=1800)
    def test_restart_heketi_pod(self):
        """Validate restarting heketi pod"""

        # create heketi volume
        vol_info = heketi_volume_create(self.heketi_client_node,
                                        self.heketi_server_url,
                                        size=1, json=True)
        self.assertTrue(vol_info, "Failed to create heketi volume of size 1")
        self.addCleanup(
            heketi_volume_delete, self.heketi_client_node,
            self.heketi_server_url, vol_info['id'], raise_on_error=False)
        topo_info = heketi_topology_info(self.heketi_client_node,
                                         self.heketi_server_url,
                                         json=True)

        # get heketi-pod name
        heketi_pod_name = get_pod_name_from_dc(self.ocp_master_node[0],
                                               self.heketi_dc_name)

        # delete heketi-pod (it restarts the pod)
        oc_delete(self.ocp_master_node[0], 'pod', heketi_pod_name)
        wait_for_resource_absence(self.ocp_master_node[0],
                                  'pod', heketi_pod_name)

        # get new heketi-pod name
        heketi_pod_name = get_pod_name_from_dc(self.ocp_master_node[0],
                                               self.heketi_dc_name)
        wait_for_pod_be_ready(self.ocp_master_node[0],
                              heketi_pod_name)

        # check heketi server is running
        self.assertTrue(
            hello_heketi(self.heketi_client_node, self.heketi_server_url),
            "Heketi server %s is not alive" % self.heketi_server_url
        )

        # compare the topology
        new_topo_info = heketi_topology_info(self.heketi_client_node,
                                             self.heketi_server_url,
                                             json=True)
        self.assertEqual(new_topo_info, topo_info, "topology info is not same,"
                         " difference - %s" % diff(topo_info, new_topo_info))

        # create new volume
        vol_info = heketi_volume_create(self.heketi_client_node,
                                        self.heketi_server_url,
                                        size=2, json=True)
        self.assertTrue(vol_info, "Failed to create heketi volume of size 20")
        heketi_volume_delete(
            self.heketi_client_node, self.heketi_server_url, vol_info['id'])
Exemplo n.º 22
0
    def test_metrics_during_cassandra_pod_respin(self, motive='delete'):
        """Validate cassandra pod respin"""
        old_cassandra_pod, pvc_name, _, _, node = (
            self.verify_cassandra_pod_multipath_and_iscsi())

        if motive == 'delete':
            # Delete the cassandra pod
            oc_delete(self.master, 'pod', old_cassandra_pod)
            self.addCleanup(self.cassandra_pod_delete_cleanup)
        elif motive == 'drain':
            # Get the number of infra nodes
            infra_node_count_cmd = (
                'oc get nodes '
                '--no-headers -l node-role.kubernetes.io/infra=true|wc -l')
            infra_node_count = command.cmd_run(infra_node_count_cmd,
                                               self.master)

            # Skip test case if number infra nodes are less than #2
            if int(infra_node_count) < 2:
                self.skipTest('Available number of infra nodes "{}", it should'
                              ' be more than 1'.format(infra_node_count))

            # Drain the node
            drain_cmd = ('oc adm drain {} --force=true --ignore-daemonsets '
                         '--delete-local-data'.format(node))
            command.cmd_run(drain_cmd, hostname=self.master)

            # Cleanup to make node schedulable
            cmd_schedule = (
                'oc adm manage-node {} --schedulable=true'.format(node))
            self.addCleanup(command.cmd_run,
                            cmd_schedule,
                            hostname=self.master)

        # Wait for pod to get absent
        wait_for_resource_absence(self.master, 'pod', old_cassandra_pod)

        # Wait for new pod to come up
        new_cassandra_pod = get_pod_name_from_rc(
            self.master, self.metrics_rc_hawkular_cassandra)
        wait_for_pod_be_ready(self.master, new_cassandra_pod)

        # Validate iscsi and multipath
        self.verify_iscsi_sessions_and_multipath(
            pvc_name,
            self.metrics_rc_hawkular_cassandra,
            rtype='rc',
            heketi_server_url=self.registry_heketi_server_url,
            is_registry_gluster=True)
Exemplo n.º 23
0
    def _heketi_pod_delete_cleanup(self):
        """Cleanup for deletion of heketi pod using force delete"""
        try:
            # Fetch heketi pod after delete
            pod_name = openshift_ops.get_pod_name_from_dc(
                self.node, self.heketi_dc_name)
            openshift_ops.wait_for_pod_be_ready(self.node, pod_name, timeout=1)
        except exceptions.ExecutionError:

            # Force delete and wait for new pod to come up
            openshift_ops.oc_delete(self.node, 'pod', pod_name, is_force=True)
            openshift_ops.wait_for_resource_absence(self.node, 'pod', pod_name)
            new_pod_name = openshift_ops.get_pod_name_from_dc(
                self.node, self.heketi_dc_name)
            openshift_ops.wait_for_pod_be_ready(self.node, new_pod_name)
Exemplo n.º 24
0
    def test_pv_resize_with_prefix_for_name(self,
                                            create_vol_name_prefix=False):
        """Validate PV resize with and without name prefix"""
        dir_path = "/mnt/"
        node = self.ocp_client[0]

        # Create PVC
        self.create_storage_class(
            allow_volume_expansion=True,
            create_vol_name_prefix=create_vol_name_prefix)
        pvc_name = self.create_and_wait_for_pvc()

        # Create DC with POD and attached PVC to it.
        dc_name = oc_create_app_dc_with_io(node, pvc_name)
        self.addCleanup(oc_delete, node, 'dc', dc_name)
        self.addCleanup(scale_dc_pod_amount_and_wait, node, dc_name, 0)

        pod_name = get_pod_name_from_dc(node, dc_name)
        wait_for_pod_be_ready(node, pod_name)
        if create_vol_name_prefix:
            ret = heketi_ops.verify_volume_name_prefix(
                node, self.sc['volumenameprefix'], self.sc['secretnamespace'],
                pvc_name, self.heketi_server_url)
            self.assertTrue(ret, "verify volnameprefix failed")
        cmd = ("dd if=/dev/urandom of=%sfile " "bs=100K count=1000") % dir_path
        ret, out, err = oc_rsh(node, pod_name, cmd)
        self.assertEqual(ret, 0,
                         "failed to execute command %s on %s" % (cmd, node))
        cmd = ("dd if=/dev/urandom of=%sfile2 "
               "bs=100K count=10000") % dir_path
        ret, out, err = oc_rsh(node, pod_name, cmd)
        self.assertNotEqual(
            ret, 0, " This IO did not fail as expected "
            "command %s on %s" % (cmd, node))
        pvc_size = 2
        resize_pvc(node, pvc_name, pvc_size)
        verify_pvc_size(node, pvc_name, pvc_size)
        pv_name = get_pv_name_from_pvc(node, pvc_name)
        verify_pv_size(node, pv_name, pvc_size)
        oc_delete(node, 'pod', pod_name)
        wait_for_resource_absence(node, 'pod', pod_name)
        pod_name = get_pod_name_from_dc(node, dc_name)
        wait_for_pod_be_ready(node, pod_name)
        cmd = ("dd if=/dev/urandom of=%sfile_new "
               "bs=50K count=10000") % dir_path
        ret, out, err = oc_rsh(node, pod_name, cmd)
        self.assertEqual(ret, 0,
                         "failed to execute command %s on %s" % (cmd, node))
Exemplo n.º 25
0
    def _guster_pod_delete_cleanup(self):
        """Cleanup for deletion of gluster pod using force delete"""
        try:
            # Fetch gluster pod after delete
            pod_name = self._get_gluster_pod()

            # Check if gluster pod name is ready state
            openshift_ops.wait_for_pod_be_ready(self.node, pod_name, timeout=1)
        except exceptions.ExecutionError:
            # Force delete and wait for new pod to come up
            openshift_ops.oc_delete(self.node, 'pod', pod_name, is_force=True)
            openshift_ops.wait_for_resource_absence(self.node, 'pod', pod_name)

            # Fetch gluster pod after force delete
            g_new_pod = self._get_gluster_pod()
            openshift_ops.wait_for_pod_be_ready(self.node, g_new_pod)
Exemplo n.º 26
0
    def test_verify_metrics_data_during_gluster_pod_respin(self):
        # Add check for CRS version
        switch_oc_project(self.master, self.registry_project_name)
        if not self.is_containerized_gluster():
            self.skipTest("Skipping this test case as CRS version check "
                          "can not be implemented")

        # Verify multipath and iscsi for cassandra pod
        switch_oc_project(self.master, self.metrics_project_name)
        hawkular_cassandra, pvc_name, iqn, _, node = (
            self.verify_cassandra_pod_multipath_and_iscsi())

        # Get the ip of active path
        device_and_ip = get_iscsi_block_devices_by_path(node, iqn)
        mpath = get_mpath_name_from_device_name(node,
                                                list(device_and_ip.keys())[0])
        active_passive_dict = get_active_and_enabled_devices_from_mpath(
            node, mpath)
        node_ip = device_and_ip[active_passive_dict['active'][0]]

        # Get the name of gluster pod from the ip
        switch_oc_project(self.master, self.registry_project_name)
        gluster_pods = get_ocp_gluster_pod_details(self.master)
        pod_name = list(
            filter(lambda pod: (pod["pod_host_ip"] == node_ip),
                   gluster_pods))[0]["pod_name"]
        err_msg = "Failed to get the gluster pod name {} with active path"
        self.assertTrue(pod_name, err_msg.format(pod_name))

        # Delete the pod
        oc_delete(self.master, 'pod', pod_name)
        wait_for_resource_absence(self.master, 'pod', pod_name)

        # Wait for new pod to come up
        pod_count = len(self.registry_servers_info.keys())
        selector = "glusterfs-node=pod"
        wait_for_pods_be_ready(self.master, pod_count, selector)

        # Validate cassandra pod state, multipath and issci
        switch_oc_project(self.master, self.metrics_project_name)
        wait_for_pod_be_ready(self.master, hawkular_cassandra, timeout=2)
        self.verify_iscsi_sessions_and_multipath(
            pvc_name,
            self.metrics_rc_hawkular_cassandra,
            rtype='rc',
            heketi_server_url=self.registry_heketi_server_url,
            is_registry_gluster=True)
Exemplo n.º 27
0
    def _heketi_pod_delete_cleanup(self):
        """Cleanup for deletion of heketi pod using force delete"""
        try:
            pod_name = get_pod_name_from_dc(self.ocp_master_node[0],
                                            self.heketi_dc_name)

            # Check if heketi pod name is ready state
            wait_for_pod_be_ready(self.ocp_master_node[0], pod_name, timeout=1)
        except ExecutionError:
            # Force delete and wait for new pod to come up
            oc_delete(self.ocp_master_node[0], 'pod', pod_name, is_force=True)
            wait_for_resource_absence(self.ocp_master_node[0], 'pod', pod_name)

            # Fetch heketi pod after force delete
            pod_name = get_pod_name_from_dc(self.ocp_master_node[0],
                                            self.heketi_dc_name)
            wait_for_pod_be_ready(self.ocp_master_node[0], pod_name)
Exemplo n.º 28
0
 def cassandra_pod_delete_cleanup(self, raise_on_error=False):
     """Cleanup for deletion of cassandra pod using force delete"""
     switch_oc_project(self.master, self.metrics_project_name)
     try:
         # Check if pod is up or ready
         pod_name = get_pod_name_from_rc(self.master,
                                         self.metrics_rc_hawkular_cassandra)
         wait_for_pod_be_ready(self.master, pod_name, timeout=1)
     except exceptions.ExecutionError as err:
         # Force delete and wait for new pod to come up
         oc_delete(self.master, 'pod', pod_name, is_force=True)
         wait_for_resource_absence(self.master, 'pod', pod_name)
         new_pod_name = get_pod_name_from_rc(
             self.master, self.metrics_rc_hawkular_cassandra)
         wait_for_pod_be_ready(self.master, new_pod_name)
         if raise_on_error:
             raise err
Exemplo n.º 29
0
    def test_resping_gluster_pod(self):
        """Validate gluster pod restart with no disruption to elasticsearch pod
        """
        restart_custom = ":status.containerStatuses[0].restartCount"

        # Fetch pod and validate iscsi and multipath
        es_pod, _ = self._get_es_pod_and_verify_iscsi_sessions()

        # Fetch the restart count for the es pod
        restart_count_before = openshift_ops.oc_get_custom_resource(
            self._master, "pod", restart_custom, es_pod)[0]

        # Switch to gluster project
        openshift_ops.switch_oc_project(self._master,
                                        self._registry_project_name)

        # Fetch the gluster pod list before
        g_pod_list_before = [
            pod["pod_name"]
            for pod in openshift_ops.get_ocp_gluster_pod_details(self._master)
        ]

        # Respin a gluster pod
        openshift_ops.oc_delete(self._master, "pod", g_pod_list_before[0])
        self.addCleanup(self._guster_pod_delete_cleanup, g_pod_list_before)

        # Wait for pod to get absent
        openshift_ops.wait_for_resource_absence(self._master, "pod",
                                                g_pod_list_before[0])

        # Fetch gluster pod after delete
        g_new_pod = self._get_newly_deployed_gluster_pod(g_pod_list_before)
        openshift_ops.wait_for_pod_be_ready(self._master, g_new_pod[0])

        # Switch to logging project
        openshift_ops.switch_oc_project(self._master,
                                        self._logging_project_name)

        # Fetch the restart count for the es pod
        restart_count_after = openshift_ops.oc_get_custom_resource(
            self._master, "pod", restart_custom, es_pod)[0]
        self.assertEqual(
            restart_count_before, restart_count_after,
            "Failed disruption to es pod found expecting restart count before"
            " {} and after {} for es pod to be equal after gluster pod"
            " respin".format(restart_count_before, restart_count_after))
    def _guster_pod_delete(self, g_pod_list_before):
        """Delete the gluster pod using force delete"""
        openshift_ops.switch_oc_project(
            self._master, self._registry_project_name)

        # Fetch newly deployed gluster pod after delete
        try:
            pod_name = self._get_newly_deployed_gluster_pod(g_pod_list_before)
            openshift_ops.wait_for_pod_be_ready(
                self._master,
                pod_name[0] if pod_name else g_pod_list_before[0],
                timeout=120, wait_step=6)
        except exceptions.ExecutionError:
            openshift_ops.oc_delete(
                self._master, 'pod', g_pod_list_before[0], is_force=True)
            openshift_ops.wait_for_resource_absence(
                self._master, 'pod', g_pod_list_before[0])
            g_new_pod = self._get_newly_deployed_gluster_pod(g_pod_list_before)
            openshift_ops.wait_for_pod_be_ready(self._master, g_new_pod[0])
Exemplo n.º 31
0
    def _respin_heketi_pod(self):
        h_node, h_url = self.heketi_client_node, self.heketi_server_url
        ocp_node = self.ocp_master_node[0]

        # get heketi-pod name
        heketi_pod_name = get_pod_name_from_dc(ocp_node, self.heketi_dc_name)
        # delete heketi-pod (it restarts the pod)
        oc_delete(ocp_node,
                  "pod",
                  heketi_pod_name,
                  collect_logs=self.heketi_logs_before_delete)
        wait_for_resource_absence(ocp_node, "pod", heketi_pod_name)

        # get new heketi-pod name
        heketi_pod_name = get_pod_name_from_dc(ocp_node, self.heketi_dc_name)
        wait_for_pod_be_ready(ocp_node, heketi_pod_name)

        # check heketi server is running
        err_msg = "Heketi server %s is not alive" % h_url
        self.assertTrue(hello_heketi(h_node, h_url), err_msg)
    def test_dynamic_provisioning_glusterfile_heketidown_pvc_delete(self):
        """Validate deletion of PVC's when heketi is down"""

        # Create storage class, secret and PVCs
        self.create_storage_class()
        self.pvc_name_list = self.create_and_wait_for_pvcs(
            1, 'pvc-heketi-down', 3)

        # remove heketi-pod
        scale_dc_pod_amount_and_wait(self.ocp_client[0], self.heketi_dc_name,
                                     0, self.storage_project_name)
        try:
            # delete pvc
            for pvc in self.pvc_name_list:
                oc_delete(self.ocp_client[0], 'pvc', pvc)
            for pvc in self.pvc_name_list:
                with self.assertRaises(ExecutionError):
                    wait_for_resource_absence(self.ocp_client[0],
                                              'pvc',
                                              pvc,
                                              interval=3,
                                              timeout=30)
        finally:
            # bring back heketi-pod
            scale_dc_pod_amount_and_wait(self.ocp_client[0],
                                         self.heketi_dc_name, 1,
                                         self.storage_project_name)

        # verify PVC's are deleted
        for pvc in self.pvc_name_list:
            wait_for_resource_absence(self.ocp_client[0],
                                      'pvc',
                                      pvc,
                                      interval=1,
                                      timeout=120)

        # create a new PVC
        self.create_and_wait_for_pvc()
Exemplo n.º 33
0
    def test_dev_path_mapping_heketi_pod_reboot(self):
        """Validate dev path mapping for heketi pod reboot
        """
        self.node = self.ocp_master_node[0]
        h_node, h_url = self.heketi_client_node, self.heketi_server_url

        # Create file volume with app pod and verify IO's
        # and Compare path, uuid, vg_name
        pod_name, dc_name, use_percent = self._create_app_pod_and_verify_pvs()

        # Fetch heketi-pod name
        heketi_pod_name = openshift_ops.get_pod_name_from_dc(
            self.node, self.heketi_dc_name)

        # Respin heketi-pod (it restarts the pod)
        openshift_ops.oc_delete(self.node,
                                "pod",
                                heketi_pod_name,
                                collect_logs=self.heketi_logs_before_delete)
        self.addCleanup(self._heketi_pod_delete_cleanup)
        openshift_ops.wait_for_resource_absence(self.node, "pod",
                                                heketi_pod_name)

        # Fetch new heketi-pod name
        heketi_pod_name = openshift_ops.get_pod_name_from_dc(
            self.node, self.heketi_dc_name)
        openshift_ops.wait_for_pod_be_ready(self.node, heketi_pod_name)

        # Check heketi server is running
        self.assertTrue(heketi_ops.hello_heketi(h_node, h_url),
                        "Heketi server {} is not alive".format(h_url))

        # Check if IO's are running after respin of heketi pod
        use_percent_after = self._get_space_use_percent_in_app_pod(pod_name)
        self.assertNotEqual(
            use_percent, use_percent_after,
            "Failed to execute IO's in the app pod {} after respin".format(
                pod_name))
Exemplo n.º 34
0
    def test_dev_path_mapping_app_pod_with_block_volume_reboot(self):
        """Validate dev path mapping for app pods with block volume after reboot
        """
        # Create block volume with app pod and verify IO's
        # and Compare path, uuid, vg_name
        pod_name, dc_name, use_percent = self._create_app_pod_and_verify_pvs()

        # Delete app pods
        openshift_ops.oc_delete(self.node, 'pod', pod_name)
        openshift_ops.wait_for_resource_absence(self.node, 'pod', pod_name)

        # Wait for the new app pod to come up
        self.assertTrue(dc_name,
                        "Failed to get the dc name from {}".format(dc_name))
        pod_name = openshift_ops.get_pod_name_from_dc(self.node, dc_name)
        openshift_ops.wait_for_pod_be_ready(self.node, pod_name)

        # Check if IO's are running after respin of app pod
        use_percent_after = self._get_space_use_percent_in_app_pod(pod_name)
        self.assertNotEqual(
            use_percent, use_percent_after,
            "Failed to execute IO's in the app pod {} after respin".format(
                pod_name))
    def test_dynamic_provisioning_glusterblock_heketidown_pvc_delete(self):
        """Validate PVC deletion when heketi is down"""

        # Create Secret, SC and PVCs
        self.create_storage_class()
        self.pvc_name_list = self.create_and_wait_for_pvcs(
            1, 'pvc-heketi-down', 3)

        # remove heketi-pod
        scale_dc_pod_amount_and_wait(self.ocp_client[0],
                                     self.heketi_dc_name,
                                     0,
                                     self.storage_project_name)
        try:
            # delete pvc
            for pvc in self.pvc_name_list:
                oc_delete(self.ocp_client[0], 'pvc', pvc)
            for pvc in self.pvc_name_list:
                with self.assertRaises(ExecutionError):
                    wait_for_resource_absence(
                       self.ocp_client[0], 'pvc', pvc,
                       interval=3, timeout=30)
        finally:
            # bring back heketi-pod
            scale_dc_pod_amount_and_wait(self.ocp_client[0],
                                         self.heketi_dc_name,
                                         1,
                                         self.storage_project_name)

        # verify PVC's are deleted
        for pvc in self.pvc_name_list:
            wait_for_resource_absence(self.ocp_client[0], 'pvc',
                                      pvc,
                                      interval=1, timeout=120)

        # create a new PVC
        self.create_and_wait_for_pvc()
    def test_dynamic_provisioning_glusterblock_reclaim_policy_retain(self):
        """Validate retain policy for gluster-block after PVC deletion"""

        if get_openshift_version() < "3.9":
            self.skipTest(
                "'Reclaim' feature is not supported in OCP older than 3.9")

        self.create_storage_class(reclaim_policy='Retain')
        self.create_and_wait_for_pvc()

        dc_name = oc_create_app_dc_with_io(self.node, self.pvc_name)

        try:
            pod_name = get_pod_name_from_dc(self.node, dc_name)
            wait_for_pod_be_ready(self.node, pod_name)
        finally:
            scale_dc_pod_amount_and_wait(self.node, dc_name, pod_amount=0)
            oc_delete(self.node, 'dc', dc_name)

        # get the name of volume
        pv_name = get_pv_name_from_pvc(self.node, self.pvc_name)

        custom = [
            r':.metadata.annotations."gluster\.org\/volume\-id"',
            r':.spec.persistentVolumeReclaimPolicy'
        ]
        vol_id, reclaim_policy = oc_get_custom_resource(
            self.node, 'pv', custom, pv_name)

        # checking the retainPolicy of pvc
        self.assertEqual(reclaim_policy, 'Retain')

        # delete the pvc
        oc_delete(self.node, 'pvc', self.pvc_name)

        # check if pv is also deleted or not
        with self.assertRaises(ExecutionError):
            wait_for_resource_absence(self.node,
                                      'pvc',
                                      self.pvc_name,
                                      interval=3,
                                      timeout=30)

        # getting the blockvol list
        blocklist = heketi_blockvolume_list(self.heketi_client_node,
                                            self.heketi_server_url)
        self.assertIn(vol_id, blocklist)

        heketi_blockvolume_delete(self.heketi_client_node,
                                  self.heketi_server_url, vol_id)
        blocklist = heketi_blockvolume_list(self.heketi_client_node,
                                            self.heketi_server_url)
        self.assertNotIn(vol_id, blocklist)
        oc_delete(self.node, 'pv', pv_name)
        wait_for_resource_absence(self.node, 'pv', pv_name)
    def test_dynamic_provisioning_glusterblock_reclaim_policy_retain(self):
        """Validate retain policy for gluster-block after PVC deletion"""

        if get_openshift_version() < "3.9":
            self.skipTest(
                "'Reclaim' feature is not supported in OCP older than 3.9")

        self.create_storage_class(reclaim_policy='Retain')
        self.create_and_wait_for_pvc()

        dc_name = oc_create_app_dc_with_io(self.node, self.pvc_name)

        try:
            pod_name = get_pod_name_from_dc(self.node, dc_name)
            wait_for_pod_be_ready(self.node, pod_name)
        finally:
            scale_dc_pod_amount_and_wait(self.node, dc_name, pod_amount=0)
            oc_delete(self.node, 'dc', dc_name)

        # get the name of volume
        pv_name = get_pv_name_from_pvc(self.node, self.pvc_name)

        custom = [r':.metadata.annotations."gluster\.org\/volume\-id"',
                  r':.spec.persistentVolumeReclaimPolicy']
        vol_id, reclaim_policy = oc_get_custom_resource(
            self.node, 'pv', custom, pv_name)

        # checking the retainPolicy of pvc
        self.assertEqual(reclaim_policy, 'Retain')

        # delete the pvc
        oc_delete(self.node, 'pvc', self.pvc_name)

        # check if pv is also deleted or not
        with self.assertRaises(ExecutionError):
            wait_for_resource_absence(
                self.node, 'pvc', self.pvc_name, interval=3, timeout=30)

        # getting the blockvol list
        blocklist = heketi_blockvolume_list(self.heketi_client_node,
                                            self.heketi_server_url)
        self.assertIn(vol_id, blocklist)

        heketi_blockvolume_delete(self.heketi_client_node,
                                  self.heketi_server_url, vol_id)
        blocklist = heketi_blockvolume_list(self.heketi_client_node,
                                            self.heketi_server_url)
        self.assertNotIn(vol_id, blocklist)
        oc_delete(self.node, 'pv', pv_name)
        wait_for_resource_absence(self.node, 'pv', pv_name)
    def test_dynamic_provisioning_glusterfile_reclaim_policy_retain(self):
        """Validate retain policy for glusterfs after deletion of pvc"""

        if get_openshift_version() < "3.9":
            self.skipTest(
                "'Reclaim' feature is not supported in OCP older than 3.9")

        self.create_storage_class(reclaim_policy='Retain')
        self.create_and_wait_for_pvc()

        # get the name of the volume
        pv_name = get_pv_name_from_pvc(self.node, self.pvc_name)
        custom = [r':.metadata.annotations.'
                  r'"gluster\.kubernetes\.io\/heketi\-volume\-id"',
                  r':.spec.persistentVolumeReclaimPolicy']

        vol_id, reclaim_policy = oc_get_custom_resource(
            self.node, 'pv', custom, pv_name)

        self.assertEqual(reclaim_policy, 'Retain')

        # Create DC with POD and attached PVC to it.
        try:
            dc_name = oc_create_app_dc_with_io(self.node, self.pvc_name)
            pod_name = get_pod_name_from_dc(self.node, dc_name)
            wait_for_pod_be_ready(self.node, pod_name)
        finally:
            scale_dc_pod_amount_and_wait(self.node, dc_name, 0)
            oc_delete(self.node, 'dc', dc_name)
            wait_for_resource_absence(self.node, 'pod', pod_name)

        oc_delete(self.node, 'pvc', self.pvc_name)

        with self.assertRaises(ExecutionError):
            wait_for_resource_absence(
                self.node, 'pvc', self.pvc_name, interval=3, timeout=30)

        heketi_volume_delete(self.heketi_client_node,
                             self.heketi_server_url, vol_id)

        vol_list = heketi_volume_list(self.heketi_client_node,
                                      self.heketi_server_url)

        self.assertNotIn(vol_id, vol_list)

        oc_delete(self.node, 'pv', pv_name)
        wait_for_resource_absence(self.node, 'pv', pv_name)
    def test_create_delete_pvcs_to_make_gluster_reuse_released_space(self):
        """Validate reuse of volume space after deletion of PVCs"""
        min_storage_gb = 10

        # Set arbiter:disabled tags to the first 2 nodes
        data_nodes = []
        biggest_disks = []
        self.assertGreater(len(self.node_id_list), 2)
        for node_id in self.node_id_list[0:2]:
            node_info = heketi_ops.heketi_node_info(
                self.heketi_client_node, self.heketi_server_url,
                node_id, json=True)
            biggest_disk_free_space = 0
            for device in node_info['devices']:
                disk_free_space = int(device['storage']['free'])
                if disk_free_space < (min_storage_gb * 1024**2):
                    self.skipTest(
                        "Devices are expected to have more than "
                        "%sGb of free space" % min_storage_gb)
                if disk_free_space > biggest_disk_free_space:
                    biggest_disk_free_space = disk_free_space
                self._set_arbiter_tag_with_further_revert(
                    self.heketi_client_node, self.heketi_server_url, 'device',
                    device['id'], 'disabled',
                    revert_to=device.get('tags', {}).get('arbiter'))
            biggest_disks.append(biggest_disk_free_space)
            self._set_arbiter_tag_with_further_revert(
                self.heketi_client_node, self.heketi_server_url, 'node',
                node_id, 'disabled',
                revert_to=node_info.get('tags', {}).get('arbiter'))
            data_nodes.append(node_info)

        # Set arbiter:required tag to all other nodes and their devices
        arbiter_nodes = []
        for node_id in self.node_id_list[2:]:
            node_info = heketi_ops.heketi_node_info(
                self.heketi_client_node, self.heketi_server_url,
                node_id, json=True)
            for device in node_info['devices']:
                self._set_arbiter_tag_with_further_revert(
                    self.heketi_client_node, self.heketi_server_url, 'device',
                    device['id'], 'required',
                    revert_to=device.get('tags', {}).get('arbiter'))
            self._set_arbiter_tag_with_further_revert(
                self.heketi_client_node, self.heketi_server_url, 'node',
                node_id, 'required',
                revert_to=node_info.get('tags', {}).get('arbiter'))
            arbiter_nodes.append(node_info)

        # Calculate size and amount of volumes to be created
        pvc_size = int(min(biggest_disks) / 1024**2)
        pvc_amount = max([len(n['devices']) for n in data_nodes]) + 1

        # Create sc with gluster arbiter info
        self.create_storage_class(is_arbiter_vol=True)

        # Create and delete 3 small volumes concurrently
        pvc_names = []
        for i in range(3):
            pvc_name = oc_create_pvc(
                self.node, self.sc_name, pvc_name_prefix='arbiter-pvc',
                pvc_size=int(pvc_size / 3))
            pvc_names.append(pvc_name)
        exception_exists = False
        for pvc_name in pvc_names:
            try:
                verify_pvc_status_is_bound(self.node, pvc_name)
            except Exception:
                for pvc_name in pvc_names:
                    self.addCleanup(
                        wait_for_resource_absence, self.node, 'pvc', pvc_name)
                for pvc_name in pvc_names:
                    self.addCleanup(oc_delete, self.node, 'pvc', pvc_name)
                exception_exists = True
        if exception_exists:
            raise
        for pvc_name in pvc_names:
            oc_delete(self.node, 'pvc', pvc_name)
        for pvc_name in pvc_names:
            wait_for_resource_absence(self.node, 'pvc', pvc_name)

        # Create and delete big volumes in a loop
        for i in range(pvc_amount):
            pvc_name = oc_create_pvc(
                self.node, self.sc_name, pvc_name_prefix='arbiter-pvc',
                pvc_size=pvc_size)
            try:
                verify_pvc_status_is_bound(self.node, pvc_name)
            except Exception:
                self.addCleanup(
                    wait_for_resource_absence, self.node, 'pvc', pvc_name)
                self.addCleanup(oc_delete, self.node, 'pvc', pvc_name)
                raise
            oc_delete(self.node, 'pvc', pvc_name)
            wait_for_resource_absence(self.node, 'pvc', pvc_name)
    def _pv_resize(self, exceed_free_space):
        dir_path = "/mnt"
        pvc_size_gb, min_free_space_gb = 1, 3

        # Get available free space disabling redundant devices and nodes
        heketi_url = self.heketi_server_url
        node_id_list = heketi_ops.heketi_node_list(
            self.heketi_client_node, heketi_url)
        self.assertTrue(node_id_list)
        nodes = {}
        min_free_space = min_free_space_gb * 1024**2
        for node_id in node_id_list:
            node_info = heketi_ops.heketi_node_info(
                self.heketi_client_node, heketi_url, node_id, json=True)
            if (node_info['state'].lower() != 'online' or
                    not node_info['devices']):
                continue
            if len(nodes) > 2:
                out = heketi_ops.heketi_node_disable(
                    self.heketi_client_node, heketi_url, node_id)
                self.assertTrue(out)
                self.addCleanup(
                    heketi_ops.heketi_node_enable,
                    self.heketi_client_node, heketi_url, node_id)
            for device in node_info['devices']:
                if device['state'].lower() != 'online':
                    continue
                free_space = device['storage']['free']
                if (node_id in nodes.keys() or free_space < min_free_space):
                    out = heketi_ops.heketi_device_disable(
                        self.heketi_client_node, heketi_url, device['id'])
                    self.assertTrue(out)
                    self.addCleanup(
                        heketi_ops.heketi_device_enable,
                        self.heketi_client_node, heketi_url, device['id'])
                    continue
                nodes[node_id] = free_space
        if len(nodes) < 3:
            raise self.skipTest(
                "Could not find 3 online nodes with, "
                "at least, 1 online device having free space "
                "bigger than %dGb." % min_free_space_gb)

        # Calculate maximum available size for PVC
        available_size_gb = int(min(nodes.values()) / (1024**2))

        # Create PVC
        self.create_storage_class(allow_volume_expansion=True)
        pvc_name = self.create_and_wait_for_pvc(pvc_size=pvc_size_gb)

        # Create DC with POD and attached PVC to it
        dc_name = oc_create_app_dc_with_io(self.node, pvc_name)
        self.addCleanup(oc_delete, self.node, 'dc', dc_name)
        self.addCleanup(scale_dc_pod_amount_and_wait, self.node, dc_name, 0)
        pod_name = get_pod_name_from_dc(self.node, dc_name)
        wait_for_pod_be_ready(self.node, pod_name)

        if exceed_free_space:
            # Try to expand existing PVC exceeding free space
            resize_pvc(self.node, pvc_name, available_size_gb)
            wait_for_events(self.node, obj_name=pvc_name,
                            event_reason='VolumeResizeFailed')

            # Check that app POD is up and runnig then try to write data
            wait_for_pod_be_ready(self.node, pod_name)
            cmd = (
                "dd if=/dev/urandom of=%s/autotest bs=100K count=1" % dir_path)
            ret, out, err = oc_rsh(self.node, pod_name, cmd)
            self.assertEqual(
                ret, 0,
                "Failed to write data after failed attempt to expand PVC.")
        else:
            # Expand existing PVC using all the available free space
            expand_size_gb = available_size_gb - pvc_size_gb
            resize_pvc(self.node, pvc_name, expand_size_gb)
            verify_pvc_size(self.node, pvc_name, expand_size_gb)
            pv_name = get_pv_name_from_pvc(self.node, pvc_name)
            verify_pv_size(self.node, pv_name, expand_size_gb)
            wait_for_events(
                self.node, obj_name=pvc_name,
                event_reason='VolumeResizeSuccessful')

            # Recreate app POD
            oc_delete(self.node, 'pod', pod_name)
            wait_for_resource_absence(self.node, 'pod', pod_name)
            pod_name = get_pod_name_from_dc(self.node, dc_name)
            wait_for_pod_be_ready(self.node, pod_name)

            # Write data on the expanded PVC
            cmd = ("dd if=/dev/urandom of=%s/autotest "
                   "bs=1M count=1025" % dir_path)
            ret, out, err = oc_rsh(self.node, pod_name, cmd)
            self.assertEqual(
                ret, 0, "Failed to write data on the expanded PVC")
    def initiator_side_failures(self):

        # get storage ips of glusterfs pods
        keys = self.gluster_servers
        gluster_ips = []
        for key in keys:
            gluster_ips.append(self.gluster_servers_info[key]['storage'])
        gluster_ips.sort()

        self.create_storage_class()
        self.create_and_wait_for_pvc()

        # find iqn and hacount from volume info
        pv_name = get_pv_name_from_pvc(self.node, self.pvc_name)
        custom = [r':.metadata.annotations."gluster\.org\/volume\-id"']
        vol_id = oc_get_custom_resource(self.node, 'pv', custom, pv_name)[0]
        vol_info = heketi_blockvolume_info(
            self.heketi_client_node, self.heketi_server_url, vol_id, json=True)
        iqn = vol_info['blockvolume']['iqn']
        hacount = int(self.sc['hacount'])

        # create app pod
        dc_name, pod_name = self.create_dc_with_pvc(self.pvc_name)

        # When we have to verify iscsi login  devices & mpaths, we run it twice
        for i in range(2):

            # get node hostname from pod info
            pod_info = oc_get_pods(
                self.node, selector='deploymentconfig=%s' % dc_name)
            node = pod_info[pod_name]['node']

            # get the iscsi sessions info from the node
            iscsi = get_iscsi_session(node, iqn)
            self.assertEqual(hacount, len(iscsi))
            iscsi.sort()
            self.assertEqual(set(iscsi), (set(gluster_ips) & set(iscsi)))

            # get the paths info from the node
            devices = get_iscsi_block_devices_by_path(node, iqn).keys()
            self.assertEqual(hacount, len(devices))

            # get mpath names and verify that only one mpath is there
            mpaths = set()
            for device in devices:
                mpaths.add(get_mpath_name_from_device_name(node, device))
            self.assertEqual(1, len(mpaths))

            validate_multipath_pod(
                self.node, pod_name, hacount, mpath=list(mpaths)[0])

            # When we have to verify iscsi session logout, we run only once
            if i == 1:
                break

            # make node unschedulabe where pod is running
            oc_adm_manage_node(
                self.node, '--schedulable=false', nodes=[node])

            # make node schedulabe where pod is running
            self.addCleanup(
                oc_adm_manage_node, self.node, '--schedulable=true',
                nodes=[node])

            # delete pod so it get respun on any other node
            oc_delete(self.node, 'pod', pod_name)
            wait_for_resource_absence(self.node, 'pod', pod_name)

            # wait for pod to come up
            pod_name = get_pod_name_from_dc(self.node, dc_name)
            wait_for_pod_be_ready(self.node, pod_name)

            # get the iscsi session from the previous node to verify logout
            iscsi = get_iscsi_session(node, iqn, raise_on_error=False)
            self.assertFalse(iscsi)