示例#1
0
    def test_add_capacity_with_resource_delete(
        self,
        add_capacity_setup,
        workload_storageutilization_rbd,
        resource_name,
        resource_id,
        is_kill_resource_repeatedly,
    ):
        """
        The function get the resource name, and id.
        The function adds capacity to the cluster, and then delete the resource while
        storage capacity is getting increased.

        Args:
            resource_name (str): the name of the resource to delete
            resource_id (int): the id of the resource to delete
            is_kill_resource_repeatedly (bool): If True then kill the resource repeatedly. Else, if False
                delete the resource only once.

        """
        used_percentage = get_percent_used_capacity()
        logging.info(
            f"storageutilization is completed. used capacity = {used_percentage}"
        )

        osd_pods_before = pod_helpers.get_osd_pods()
        number_of_osd_pods_before = len(osd_pods_before)

        d = Disruptions()
        d.set_resource(resource_name)

        self.new_pods_in_status_running = False

        osd_size = storage_cluster.get_osd_size()
        logging.info(f"Adding one new set of OSDs. osd size = {osd_size}")
        storagedeviceset_count = storage_cluster.add_capacity(osd_size)
        logging.info("Adding one new set of OSDs was issued without problems")

        # Wait for new osd's to come up. After the first new osd in status Init - delete the resource.
        # After deleting the resource we expect that all the new osd's will be in status running,
        # and the delete resource will be also in status running.
        pod_helpers.wait_for_new_osd_pods_to_come_up(number_of_osd_pods_before)
        logging.info(
            f"Delete a {resource_name} pod while storage capacity is getting increased"
        )
        if is_kill_resource_repeatedly:
            with ThreadPoolExecutor() as executor:
                executor.submit(self.kill_resource_repeatedly, resource_name,
                                resource_id)
                self.wait_for_osd_pods_to_be_running(storagedeviceset_count)
        else:
            d.delete_resource(resource_id)
            self.wait_for_osd_pods_to_be_running(storagedeviceset_count)

        self.new_pods_in_status_running = True
        logging.info(
            "Finished verifying add capacity when one of the pods gets deleted"
        )
        logging.info("Waiting for ceph health check to finished...")
        check_ceph_health_after_add_capacity()
示例#2
0
def add_capacity_test():
    osd_size = storage_cluster.get_osd_size()
    result = storage_cluster.add_capacity(osd_size)
    pod = OCP(kind=constants.POD,
              namespace=config.ENV_DATA["cluster_namespace"])
    pod.wait_for_resource(
        timeout=300,
        condition=constants.STATUS_RUNNING,
        selector="app=rook-ceph-osd",
        resource_count=result * 3,
    )

    # Verify status of rook-ceph-osd-prepare pods. Verifies bug 1769061
    # pod.wait_for_resource(
    #     timeout=300,
    #     condition=constants.STATUS_COMPLETED,
    #     selector=constants.OSD_PREPARE_APP_LABEL,
    #     resource_count=result * 3
    # )
    # Commented this lines as a workaround due to bug 1842500

    # Verify OSDs are encrypted.
    if config.ENV_DATA.get("encryption_at_rest"):
        osd_encryption_verification()

    ceph_health_check(namespace=config.ENV_DATA["cluster_namespace"], tries=80)
    ceph_cluster_obj = CephCluster()
    assert ceph_cluster_obj.wait_for_rebalance(
        timeout=5400), "Data re-balance failed to complete"
示例#3
0
    def test_scale_osds_reboot_nodes(self, interface, project_factory,
                                     multi_pvc_factory, dc_pod_factory):
        """
        Check storage utilization, if its less then runs IO,
        Scale osds from 3-6, check for rebalance and reboot workers
        """
        current_osd_count = count_cluster_osd()
        proj_obj = project_factory()
        if current_osd_count == 3:
            while not validate_osd_utilization(osd_used=50):
                # Create pvc
                pvc_objs = multi_pvc_factory(project=proj_obj,
                                             interface=interface,
                                             size=self.pvc_size,
                                             num_of_pvc=self.num_of_pvcs)

                dc_pod_objs = list()
                for pvc_obj in pvc_objs:
                    dc_pod_objs.append(dc_pod_factory(pvc=pvc_obj))

                wait_for_dc_app_pods_to_reach_running_state(dc_pod_objs)

                for pod_obj in dc_pod_objs:
                    pod_obj.run_io(storage_type='fs',
                                   size='3G',
                                   runtime='60',
                                   fio_filename=f'{pod_obj.name}_io')

        # Add capacity
        osd_size = storage_cluster.get_osd_size()
        count = storage_cluster.add_capacity(osd_size)
        pod = OCP(kind=constants.POD,
                  namespace=config.ENV_DATA['cluster_namespace'])
        pod.wait_for_resource(timeout=300,
                              condition=constants.STATUS_RUNNING,
                              selector='app=rook-ceph-osd',
                              resource_count=count * 3)
        assert ceph_health_check(), "New OSDs failed to reach running state"

        cluster = CephCluster()

        # Get rebalance status
        rebalance_status = cluster.get_rebalance_status()
        logger.info(rebalance_status)
        if rebalance_status:
            time_taken = cluster.time_taken_to_complete_rebalance()
            logger.info(f"The time taken to complete rebalance {time_taken}")

        # Rolling reboot on worker nodes
        worker_nodes = get_typed_nodes(node_type='worker')

        factory = platform_nodes.PlatformNodesFactory()
        nodes = factory.get_nodes_platform()

        for node in worker_nodes:
            nodes.restart_nodes(nodes=[node])
            wait_for_nodes_status()

        assert ceph_health_check(
            delay=180), "Failed, Ceph health bad after nodes reboot"
    def test_add_capacity_node_restart(
        self,
        nodes,
        multi_pvc_factory,
        pod_factory,
        workload_storageutilization_rbd,
        num_of_nodes,
    ):
        """
        test add capacity when one of the worker nodes got restart in the middle of the process
        """
        logging.info(
            "Condition 1 to start the test is met: storageutilization is completed"
        )
        # Please notice: When the branch 'wip-add-capacity-e_e' will be merged into master
        # the test will include more much data both before, and after calling 'add_capacity'function.

        node_list = get_ocs_nodes(num_of_nodes=num_of_nodes)
        assert node_list, "Condition 2 to start test failed: No node to restart"

        max_osds = 15
        osd_pods_before = pod_helpers.get_osd_pods()
        assert (
            len(osd_pods_before) < max_osds
        ), "Condition 3 to start test failed: We have maximum of osd's in the cluster"
        logging.info("All start conditions are met!")

        osd_size = storage_cluster.get_osd_size()
        logging.info("Calling add_capacity function...")
        result = storage_cluster.add_capacity(osd_size)
        if result:
            logging.info("add capacity finished successfully")
        else:
            logging.info("add capacity failed")

        # Restart nodes while additional storage is being added
        logging.info("Restart nodes:")
        logging.info([n.name for n in node_list])
        nodes.restart_nodes(nodes=node_list, wait=True)
        logging.info("Finished restarting the node list")

        # The exit criteria verification conditions here are not complete. When the branch
        # 'wip-add-capacity-e_e' will be merged into master I will use the functions from this branch.

        pod = OCP(kind=constants.POD, namespace=config.ENV_DATA["cluster_namespace"])
        pod.wait_for_resource(
            timeout=600,
            condition=constants.STATUS_RUNNING,
            selector="app=rook-ceph-osd",
            resource_count=result * 3,
        )

        # Verify OSDs are encrypted
        if config.ENV_DATA.get("encryption_at_rest"):
            osd_encryption_verification()

        logging.info("Finished verifying add capacity osd storage with node restart")
        logging.info("Waiting for ceph health check to finished...")
        ceph_health_check(namespace=config.ENV_DATA["cluster_namespace"], tries=180)
示例#5
0
def add_capacity_test():
    osd_size = storage_cluster.get_osd_size()
    existing_osd_pods = get_osd_pods()
    existing_osd_pod_names = [pod.name for pod in existing_osd_pods]
    if ui_add_capacity_conditions():
        try:
            result = ui_add_capacity(osd_size)
        except Exception as e:
            logging.error(
                f"Add capacity via UI is not applicable and CLI method will be done. The error is {e}"
            )
            result = storage_cluster.add_capacity(osd_size)
    else:
        result = storage_cluster.add_capacity(osd_size)
    osd_pods_post_expansion = get_osd_pods()
    osd_pod_names_post_expansion = [
        pod.name for pod in osd_pods_post_expansion
    ]
    restarted_osds = list()
    logger.info(
        "Checking if existing OSD pods were restarted (deleted) post add capacity (bug 1931601)"
    )

    for pod in existing_osd_pod_names:
        if pod not in osd_pod_names_post_expansion:
            restarted_osds.append(pod)
    assert (
        len(restarted_osds) == 0
    ), f"The following OSD pods were restarted (deleted) post add capacity: {restarted_osds}"

    pod = OCP(kind=constants.POD,
              namespace=config.ENV_DATA["cluster_namespace"])
    if is_flexible_scaling_enabled():
        replica_count = 1
    else:
        replica_count = 3
    pod.wait_for_resource(
        timeout=300,
        condition=constants.STATUS_RUNNING,
        selector="app=rook-ceph-osd",
        resource_count=result * replica_count,
    )

    # Verify status of rook-ceph-osd-prepare pods. Verifies bug 1769061
    # pod.wait_for_resource(
    #     timeout=300,
    #     condition=constants.STATUS_COMPLETED,
    #     selector=constants.OSD_PREPARE_APP_LABEL,
    #     resource_count=result * 3
    # )
    # Commented this lines as a workaround due to bug 1842500

    # Verify OSDs are encrypted.
    if config.ENV_DATA.get("encryption_at_rest"):
        osd_encryption_verification()

    check_ceph_health_after_add_capacity(ceph_rebalance_timeout=3600)
示例#6
0
 def test_add_capacity(self):
     """
     Test to add variable capacity to the OSD cluster while IOs running
     """
     self.ceph_cluster = CephCluster()
     osd_size = storage_cluster.get_osd_size()
     result = storage_cluster.add_capacity(osd_size)
     pod = OCP(kind=constants.POD,
               namespace=config.ENV_DATA['cluster_namespace'])
     pod.wait_for_resource(timeout=300,
                           condition=constants.STATUS_RUNNING,
                           selector='app=rook-ceph-osd',
                           resource_count=result * 3)
     self.ceph_cluster.cluster_health_check(timeout=1200)
def add_capacity_test():
    osd_size = storage_cluster.get_osd_size()
    existing_osd_pods = get_osd_pods()
    existing_osd_pod_names = [pod.name for pod in existing_osd_pods]
    result = storage_cluster.add_capacity(osd_size)
    osd_pods_post_expansion = get_osd_pods()
    osd_pod_names_post_expansion = [
        pod.name for pod in osd_pods_post_expansion
    ]
    restarted_osds = list()
    logger.info(
        "Checking if existing OSD pods were restarted (deleted) post add capacity (bug 1931601)"
    )

    for pod in existing_osd_pod_names:
        if pod not in osd_pod_names_post_expansion:
            restarted_osds.append(pod)
    assert (
        len(restarted_osds) == 0
    ), f"The following OSD pods were restarted (deleted) post add capacity: {restarted_osds}"

    pod = OCP(kind=constants.POD,
              namespace=config.ENV_DATA["cluster_namespace"])
    pod.wait_for_resource(
        timeout=300,
        condition=constants.STATUS_RUNNING,
        selector="app=rook-ceph-osd",
        resource_count=result * 3,
    )

    # Verify status of rook-ceph-osd-prepare pods. Verifies bug 1769061
    # pod.wait_for_resource(
    #     timeout=300,
    #     condition=constants.STATUS_COMPLETED,
    #     selector=constants.OSD_PREPARE_APP_LABEL,
    #     resource_count=result * 3
    # )
    # Commented this lines as a workaround due to bug 1842500

    # Verify OSDs are encrypted.
    if config.ENV_DATA.get("encryption_at_rest"):
        osd_encryption_verification()

    ceph_health_check(namespace=config.ENV_DATA["cluster_namespace"], tries=80)
    ceph_cluster_obj = CephCluster()
    assert ceph_cluster_obj.wait_for_rebalance(
        timeout=5400), "Data re-balance failed to complete"
示例#8
0
    def test_add_capacity_osd_pod_delete(self, workload_storageutilization_rbd):
        """
        Test add capacity when one of the osd pods gets deleted
        in the middle of the process.
        """
        used_percentage = get_percent_used_capacity()
        logging.info(f"storageutilization is completed. used capacity = {used_percentage}")

        max_osds = 15
        osd_pods_before = pod_helpers.get_osd_pods()
        number_of_osd_pods_before = len(osd_pods_before)
        if number_of_osd_pods_before >= max_osds:
            pytest.skip("We have maximum of osd's in the cluster")

        d = Disruptions()
        d.set_resource('osd')

        osd_size = storage_cluster.get_osd_size()
        logging.info(f"Adding one new set of OSDs. osd size = {osd_size}")
        storagedeviceset_count = storage_cluster.add_capacity(osd_size)
        logging.info("Adding one new set of OSDs was issued without problems")

        # OSD number go down by one and then gradually go up by 1
        # and finally the OSD number will be storagedeviceset_count*3
        pod_helpers.wait_for_new_osd_pods_to_come_up(number_of_osd_pods_before)
        logging.info("Delete an osd pod while storage capacity is getting increased")
        d.delete_resource(1)

        pod = OCP(
            kind=constants.POD, namespace=config.ENV_DATA['cluster_namespace']
        )

        pod.wait_for_resource(
            timeout=420,
            condition=constants.STATUS_RUNNING,
            selector='app=rook-ceph-osd',
            resource_count=storagedeviceset_count * 3
        )

        logging.info("Finished verifying add capacity when one of the osd pods gets deleted")
        logging.info("Waiting for ceph health check to finished...")
        ceph_health_check(
            namespace=config.ENV_DATA['cluster_namespace'], tries=80
        )
示例#9
0
def add_capacity_test():
    osd_size = storage_cluster.get_osd_size()
    result = storage_cluster.add_capacity(osd_size)
    pod = OCP(kind=constants.POD,
              namespace=config.ENV_DATA['cluster_namespace'])
    pod.wait_for_resource(timeout=300,
                          condition=constants.STATUS_RUNNING,
                          selector='app=rook-ceph-osd',
                          resource_count=result * 3)

    # Verify status of rook-ceph-osd-prepare pods. Verifies bug 1769061
    # pod.wait_for_resource(
    #     timeout=300,
    #     condition=constants.STATUS_COMPLETED,
    #     selector=constants.OSD_PREPARE_APP_LABEL,
    #     resource_count=result * 3
    # )
    # Commented this lines as a workaround due to bug 1842500

    ceph_health_check(namespace=config.ENV_DATA['cluster_namespace'], tries=80)
示例#10
0
    def test_add_capacity(self):
        """
        Test to add variable capacity to the OSD cluster while IOs running
        """
        osd_size = storage_cluster.get_osd_size()
        result = storage_cluster.add_capacity(osd_size)
        pod = OCP(kind=constants.POD,
                  namespace=config.ENV_DATA['cluster_namespace'])
        pod.wait_for_resource(timeout=300,
                              condition=constants.STATUS_RUNNING,
                              selector='app=rook-ceph-osd',
                              resource_count=result * 3)

        # Verify status of rook-ceph-osd-prepare pods. Verifies bug 1769061
        pod.wait_for_resource(timeout=300,
                              condition=constants.STATUS_COMPLETED,
                              selector=constants.OSD_PREPARE_APP_LABEL,
                              resource_count=result * 3)

        ceph_health_check(namespace=config.ENV_DATA['cluster_namespace'],
                          tries=80)
    def base_setup(self, interface, pvc_factory, pod_factory):
        """
        A setup phase for the test:
        get all the ceph pods information,
        create maxsize pvc, pod and run IO

        """
        osd_size = get_osd_size()
        pvc_size_gb = osd_size * 1024
        io_size_mb = f'{int((pvc_size_gb / 2) * 1000)}M'

        pod_objs = get_all_pods(namespace=defaults.ROOK_CLUSTER_NAMESPACE,
                                selector=[
                                    'noobaa', 'rook-ceph-osd-prepare',
                                    'rook-ceph-drain-canary'
                                ],
                                exclude_selector=True)

        # Create maxsize pvc, app pod and run ios
        self.sc = default_storage_class(interface_type=interface)

        self.pvc_obj = pvc_factory(
            interface=interface,
            storageclass=self.sc,
            size=pvc_size_gb,
        )

        self.pod_obj = pod_factory(interface=interface, pvc=self.pvc_obj)

        log.info(f"Running FIO to fill PVC size: {io_size_mb}")
        self.pod_obj.run_io('fs',
                            size=io_size_mb,
                            io_direction='write',
                            runtime=600)

        log.info("Waiting for IO results")
        self.pod_obj.get_fio_results()

        return pod_objs
示例#12
0
def ui_add_capacity(osd_size_capacity_requested):
    """
    Add Capacity via UI

    Args:
        osd_size_capacity_requested (int): Requested osd size capacity

    Returns:
        new_storage_devices_sets_count (int) : Returns True if all OSDs are in Running state

    """
    osd_size_existing = get_osd_size()
    device_sets_required = int(osd_size_capacity_requested / osd_size_existing)
    old_storage_devices_sets_count = get_deviceset_count()
    new_storage_devices_sets_count = int(device_sets_required +
                                         old_storage_devices_sets_count)
    logger.info("Add capacity via UI")
    setup_ui = login_ui()
    add_ui_obj = AddReplaceDeviceUI(setup_ui)
    add_ui_obj.add_capacity_ui()
    close_browser(setup_ui)
    return new_storage_devices_sets_count
    def test_base_operation_node_drain(
        self,
        node_drain_teardown,
        node_restart_teardown,
        nodes,
        pgsql_factory_fixture,
        project_factory,
        multi_pvc_factory,
        mcg_obj,
        bucket_factory,
    ):
        """
        Test covers following flow operations while running workloads in the background:
        1. Node drain
        2. Add capacity
        3. Node reboot
        4. Node n/w failure

        """
        logger.info("Starting IO operations in Background")
        project = project_factory()
        bg_handler = flowtest.BackgroundOps()
        executor_run_bg_ios_ops = ThreadPoolExecutor(max_workers=3)

        pgsql_workload = executor_run_bg_ios_ops.submit(
            bg_handler.handler,
            pgsql_factory_fixture,
            replicas=1,
            clients=1,
            transactions=100,
            timeout=100,
            iterations=1,
        )
        logging.info("Started pgsql workload in background")

        flow_ops = flowtest.FlowOperations()

        obc_ios = executor_run_bg_ios_ops.submit(
            bg_handler.handler,
            flow_ops.sanity_helpers.obc_put_obj_create_delete,
            mcg_obj,
            bucket_factory,
            iterations=30,
        )
        logging.info("Started object IOs in background")

        pvc_create_delete = executor_run_bg_ios_ops.submit(
            bg_handler.handler,
            flow_ops.sanity_helpers.create_pvc_delete,
            multi_pvc_factory,
            project,
            iterations=70,
        )
        logging.info("Started pvc create and delete in background")

        logger.info("Starting operation 1: Node Drain")
        node_name = flow_ops.node_operations_entry_criteria(
            node_type="worker", number_of_nodes=1, operation_name="Node Drain")
        # Node maintenance - to gracefully terminate all pods on the node
        node.drain_nodes([node_name[0].name])
        # Make the node schedulable again
        node.schedule_nodes([node_name[0].name])
        logger.info("Verifying exit criteria for operation 1: Node Drain")
        flow_ops.validate_cluster(node_status=True,
                                  pod_status=True,
                                  operation_name="Node Drain")

        logger.info("Starting operation 2: Add Capacity")
        osd_pods_before, restart_count_before = flow_ops.add_capacity_entry_criteria(
        )
        # Add capacity
        osd_size = storage_cluster.get_osd_size()
        result = storage_cluster.add_capacity(osd_size)
        pod = OCP(kind=constants.POD,
                  namespace=config.ENV_DATA["cluster_namespace"])
        if is_flexible_scaling_enabled:
            replica_count = 1
        else:
            replica_count = 3
        pod.wait_for_resource(
            timeout=300,
            condition=constants.STATUS_RUNNING,
            selector="app=rook-ceph-osd",
            resource_count=result * replica_count,
        )
        logger.info("Verifying exit criteria for operation 2: Add Capacity")
        flow_ops.add_capacity_exit_criteria(restart_count_before,
                                            osd_pods_before)

        logger.info("Starting operation 3: Node Restart")
        node_name = flow_ops.node_operations_entry_criteria(
            node_type="worker",
            number_of_nodes=1,
            operation_name="Node Restart")
        # Node failure (reboot)
        nodes.restart_nodes(nodes=node_name)
        logger.info("Verifying exit criteria for operation 3: Node Restart")
        flow_ops.validate_cluster(node_status=True,
                                  pod_status=True,
                                  operation_name="Node Restart")

        logger.info("Starting operation 4: Node network fail")
        node_name, nw_fail_time = flow_ops.node_operations_entry_criteria(
            node_type="worker",
            number_of_nodes=1,
            network_fail_time=300,
            operation_name="Node N/W failure",
        )
        # Node n/w interface failure
        node.node_network_failure(node_name[0].name)
        logger.info(f"Waiting for {nw_fail_time} seconds")
        sleep(nw_fail_time)
        # Reboot the unresponsive node(s)
        logger.info(
            f"Stop and start the unresponsive node(s): {node_name[0].name}")
        nodes.restart_nodes_by_stop_and_start(nodes=node_name)
        logger.info(
            "Verifying exit criteria for operation 4: Node network fail")
        flow_ops.validate_cluster(node_status=True,
                                  pod_status=True,
                                  operation_name="Node N/W failure")

        logger.info(
            "Waiting for final iteration of background operations to be completed"
        )
        bg_ops = [pvc_create_delete, obc_ios, pgsql_workload]
        bg_handler.wait_for_bg_operations(bg_ops, timeout=600)
示例#14
0
def delete_and_create_osd_node_vsphere_upi_lso(osd_node_name, use_existing_node=False):
    """
    Unschedule, drain and delete osd node, and creating a new osd node.
    At the end of the function there should be the same number of osd nodes as
    it was in the beginning, and also ceph health should be OK.
    This function is for vSphere UPI.

    Args:
        osd_node_name (str): the name of the osd node
        use_existing_node (bool): If False, create a new node and label it.
            If True, use an existing node to replace the deleted node
            and label it.

    Returns:
        str: The new node name

    """
    from ocs_ci.ocs.platform_nodes import PlatformNodesFactory
    from ocs_ci.ocs.resources.storage_cluster import get_osd_size

    sc_name = constants.LOCAL_BLOCK_RESOURCE
    old_pv_objs = get_pv_objs_in_sc(sc_name)

    osd_node = get_node_objs(node_names=[osd_node_name])[0]
    osd_pod = get_node_pods(osd_node_name, pods_to_search=pod.get_osd_pods())[0]
    osd_id = pod.get_osd_pod_id(osd_pod)
    log.info(f"osd id to remove = {osd_id}")
    # Save the node hostname before deleting the node
    osd_node_hostname_label = get_node_hostname_label(osd_node)

    log.info("Scale down node deployments...")
    scale_down_deployments(osd_node_name)
    log.info("Scale down deployments finished successfully")

    new_node_name = delete_and_create_osd_node_vsphere_upi(
        osd_node_name, use_existing_node
    )
    assert new_node_name, "Failed to create a new node"
    log.info(f"New node created successfully. Node name: {new_node_name}")

    # If we use LSO, we need to create and attach a new disk manually
    new_node = get_node_objs(node_names=[new_node_name])[0]
    plt = PlatformNodesFactory()
    node_util = plt.get_nodes_platform()
    osd_size = get_osd_size()
    log.info(
        f"Create a new disk with size {osd_size}, and attach to node {new_node_name}"
    )
    node_util.create_and_attach_volume(node=new_node, size=osd_size)

    new_node_hostname_label = get_node_hostname_label(new_node)
    log.info(
        "Replace the old node with the new worker node in localVolumeDiscovery and localVolumeSet"
    )
    res = add_new_node_to_lvd_and_lvs(
        old_node_name=osd_node_hostname_label,
        new_node_name=new_node_hostname_label,
    )
    assert res, "Failed to add the new node to LVD and LVS"

    log.info("Verify new pv is available...")
    is_new_pv_available = verify_new_pv_available_in_sc(old_pv_objs, sc_name)
    assert is_new_pv_available, "New pv is not available"
    log.info("Finished verifying that the new pv is available")

    osd_removal_job = pod.run_osd_removal_job(osd_id)
    assert osd_removal_job, "ocs-osd-removal failed to create"
    is_completed = (pod.verify_osd_removal_job_completed_successfully(osd_id),)
    assert is_completed, "ocs-osd-removal-job is not in status 'completed'"
    log.info("ocs-osd-removal-job completed successfully")

    expected_num_of_deleted_pvs = 1
    num_of_deleted_pvs = delete_released_pvs_in_sc(sc_name)
    assert (
        num_of_deleted_pvs == expected_num_of_deleted_pvs
    ), f"num of deleted PVs is {num_of_deleted_pvs} instead of {expected_num_of_deleted_pvs}"
    log.info("Successfully deleted old pv")

    is_deleted = pod.delete_osd_removal_job(osd_id)
    assert is_deleted, "Failed to delete ocs-osd-removal-job"
    log.info("ocs-osd-removal-job deleted successfully")

    return new_node_name
示例#15
0
    def test_recovery_from_volume_deletion(self, nodes, pvc_factory,
                                           pod_factory):
        """
        Test cluster recovery from disk deletion from the platform side.
        Based on documented procedure detailed in
        https://bugzilla.redhat.com/show_bug.cgi?id=1823183

        """
        logger.info("Picking a PV which to be deleted from the platform side")
        osd_pvs = get_deviceset_pvs()
        osd_pv = random.choice(osd_pvs)
        osd_pv_name = osd_pv.name
        # get the claim name
        logger.info(f"Getting the claim name for OSD PV {osd_pv_name}")
        claim_name = osd_pv.get().get("spec").get("claimRef").get("name")

        # Get the backing volume name
        logger.info(f"Getting the backing volume name for PV {osd_pv_name}")
        backing_volume = nodes.get_data_volumes(pvs=[osd_pv])[0]

        # Get the corresponding PVC
        logger.info(f"Getting the corresponding PVC of PV {osd_pv_name}")
        osd_pvcs = get_deviceset_pvcs()
        osd_pvcs_count = len(osd_pvcs)
        osd_pvc = [
            ds for ds in osd_pvcs
            if ds.get().get("metadata").get("name") == claim_name
        ][0]

        # Get the corresponding OSD pod and ID
        logger.info(f"Getting the OSD pod using PVC {osd_pvc.name}")
        osd_pods = get_osd_pods()
        osd_pods_count = len(osd_pods)
        osd_pod = [
            osd_pod for osd_pod in osd_pods
            if osd_pod.get().get("metadata").get("labels").get(
                constants.CEPH_ROOK_IO_PVC_LABEL) == claim_name
        ][0]
        logger.info(f"OSD_POD {osd_pod.name}")
        osd_id = osd_pod.get().get("metadata").get("labels").get("ceph-osd-id")

        # Get the node that has the OSD pod running on
        logger.info(
            f"Getting the node that has the OSD pod {osd_pod.name} running on")
        osd_node = get_pod_node(osd_pod)
        osd_prepare_pods = get_osd_prepare_pods()
        osd_prepare_pod = [
            pod for pod in osd_prepare_pods if pod.get().get("metadata").get(
                "labels").get(constants.CEPH_ROOK_IO_PVC_LABEL) == claim_name
        ][0]
        osd_prepare_job_name = (osd_prepare_pod.get().get("metadata").get(
            "labels").get("job-name"))
        osd_prepare_job = get_job_obj(osd_prepare_job_name)

        # Get the corresponding OSD deployment
        logger.info(f"Getting the OSD deployment for OSD PVC {claim_name}")
        osd_deployment = [
            osd_pod for osd_pod in get_osd_deployments()
            if osd_pod.get().get("metadata").get("labels").get(
                constants.CEPH_ROOK_IO_PVC_LABEL) == claim_name
        ][0]
        osd_deployment_name = osd_deployment.name

        # Delete the volume from the platform side
        logger.info(f"Deleting {backing_volume} from the platform side")
        nodes.detach_volume(backing_volume, osd_node)

        # Scale down OSD deployment
        logger.info(f"Scaling down OSD deployment {osd_deployment_name} to 0")
        ocp.OCP().exec_oc_cmd(
            f"scale --replicas=0 deployment/{osd_deployment_name}")

        # Force delete OSD pod if necessary
        osd_pod_name = osd_pod.name
        logger.info(f"Waiting for OSD pod {osd_pod.name} to get deleted")
        try:
            osd_pod.ocp.wait_for_delete(resource_name=osd_pod_name)
        except TimeoutError:
            osd_pod.delete(force=True)
            osd_pod.ocp.wait_for_delete(resource_name=osd_pod_name)

        # Run ocs-osd-removal job
        ocp_version = float(get_ocp_version())
        if ocp_version >= 4.6:
            cmd = f"process ocs-osd-removal -p FAILED_OSD_IDS={osd_id} -o yaml"
        else:
            cmd = f"process ocs-osd-removal -p FAILED_OSD_ID={osd_id} -o yaml"

        logger.info(f"Executing OSD removal job on OSD-{osd_id}")
        ocp_obj = ocp.OCP(namespace=config.ENV_DATA["cluster_namespace"])
        osd_removal_job_yaml = ocp_obj.exec_oc_cmd(cmd)
        osd_removal_job = OCS(**osd_removal_job_yaml)
        osd_removal_job.create(do_reload=False)

        # Get ocs-osd-removal pod name
        logger.info("Getting the ocs-osd-removal pod name")
        osd_removal_pod_name = get_osd_removal_pod_name(osd_id)
        osd_removal_pod_obj = get_pod_obj(osd_removal_pod_name,
                                          namespace="openshift-storage")
        osd_removal_pod_obj.ocp.wait_for_resource(
            condition=constants.STATUS_COMPLETED,
            resource_name=osd_removal_pod_name)

        # Verify OSD removal from the ocs-osd-removal pod logs
        logger.info(
            f"Verifying removal of OSD from {osd_removal_pod_name} pod logs")
        logs = get_pod_logs(osd_removal_pod_name)
        pattern = f"purged osd.{osd_id}"
        assert re.search(pattern, logs)

        osd_pvc_name = osd_pvc.name

        if ocp_version < 4.6:
            # Delete the OSD prepare job
            logger.info(f"Deleting OSD prepare job {osd_prepare_job_name}")
            osd_prepare_job.delete()
            osd_prepare_job.ocp.wait_for_delete(
                resource_name=osd_prepare_job_name, timeout=120)

            # Delete the OSD PVC
            logger.info(f"Deleting OSD PVC {osd_pvc_name}")
            osd_pvc.delete()
            osd_pvc.ocp.wait_for_delete(resource_name=osd_pvc_name)

            # Delete the OSD deployment
            logger.info(f"Deleting OSD deployment {osd_deployment_name}")
            osd_deployment.delete()
            osd_deployment.ocp.wait_for_delete(
                resource_name=osd_deployment_name, timeout=120)
        else:
            # If ocp version is '4.6' and above the osd removal job should
            # delete the OSD prepare job, OSD PVC, OSD deployment
            logger.info(
                f"Verifying deletion of OSD prepare job {osd_prepare_job_name}"
            )
            osd_prepare_job.ocp.wait_for_delete(
                resource_name=osd_prepare_job_name, timeout=30)
            logger.info(f"Verifying deletion of OSD PVC {osd_pvc_name}")
            osd_pvc.ocp.wait_for_delete(resource_name=osd_pvc_name, timeout=30)
            logger.info(
                f"Verifying deletion of OSD deployment {osd_deployment_name}")
            osd_deployment.ocp.wait_for_delete(
                resource_name=osd_deployment_name, timeout=30)

        # Delete PV
        logger.info(f"Verifying deletion of PV {osd_pv_name}")
        try:
            osd_pv.ocp.wait_for_delete(resource_name=osd_pv_name)
        except TimeoutError:
            osd_pv.delete()
            osd_pv.ocp.wait_for_delete(resource_name=osd_pv_name)

        # If we use LSO, we need to create and attach a new disk manually
        if cluster.is_lso_cluster():
            osd_size = get_osd_size()
            logger.info(f"Create a new disk with size {osd_size}")
            nodes.create_and_attach_volume(node=osd_node, size=osd_size)

        if ocp_version < 4.6:
            # Delete the rook ceph operator pod to trigger reconciliation
            rook_operator_pod = get_operator_pods()[0]
            logger.info(
                f"deleting Rook Ceph operator pod {rook_operator_pod.name}")
            rook_operator_pod.delete()

        # Delete the OSD removal job
        logger.info(f"Deleting OSD removal job ocs-osd-removal-{osd_id}")
        osd_removal_job = get_job_obj(f"ocs-osd-removal-{osd_id}")
        osd_removal_job.delete()
        osd_removal_job.ocp.wait_for_delete(
            resource_name=f"ocs-osd-removal-{osd_id}")

        timeout = 600
        # Wait for OSD PVC to get created and reach Bound state
        logger.info(
            "Waiting for a new OSD PVC to get created and reach Bound state")
        assert osd_pvc.ocp.wait_for_resource(
            timeout=timeout,
            condition=constants.STATUS_BOUND,
            selector=constants.OSD_PVC_GENERIC_LABEL,
            resource_count=osd_pvcs_count,
        ), (f"Cluster recovery failed after {timeout} seconds. "
            f"Expected to have {osd_pvcs_count} OSD PVCs in status Bound. Current OSD PVCs status: "
            f"{[pvc.ocp.get_resource(pvc.get().get('metadata').get('name'), 'STATUS') for pvc in get_deviceset_pvcs()]}"
            )
        # Wait for OSD pod to get created and reach Running state
        logger.info(
            "Waiting for a new OSD pod to get created and reach Running state")
        assert osd_pod.ocp.wait_for_resource(
            timeout=timeout,
            condition=constants.STATUS_RUNNING,
            selector=constants.OSD_APP_LABEL,
            resource_count=osd_pods_count,
        ), (f"Cluster recovery failed after {timeout} seconds. "
            f"Expected to have {osd_pods_count} OSD pods in status Running. Current OSD pods status: "
            f"{[osd_pod.ocp.get_resource(pod.get().get('metadata').get('name'), 'STATUS') for pod in get_osd_pods()]}"
            )

        # We need to silence the old osd crash warning due to BZ https://bugzilla.redhat.com/show_bug.cgi?id=1896810
        # This is a workaround - issue for tracking: https://github.com/red-hat-storage/ocs-ci/issues/3438
        if ocp_version >= 4.6:
            silence_osd_crash = cluster.wait_for_silence_ceph_osd_crash_warning(
                osd_pod_name)
            if not silence_osd_crash:
                logger.info("Didn't find ceph osd crash warning")

        # Validate cluster is still functional
        self.sanity_helpers.health_check(tries=100)
        self.sanity_helpers.create_resources(pvc_factory, pod_factory)
    def test_non_ocs_taint_and_tolerations(self):
        """
        Test runs the following steps
        1. Taint ocs nodes with non-ocs taint
        2. Set tolerations on storagecluster, subscription, configmap and ocsinit
        3. Respin all ocs pods and check if it runs on ocs nodes with tolerations
        4. Add Capacity

        """

        # Taint all nodes with non-ocs taint
        ocs_nodes = get_worker_nodes()
        taint_nodes(nodes=ocs_nodes, taint_label="xyz=true:NoSchedule")

        # Add tolerations to the storagecluster
        storagecluster_obj = ocp.OCP(
            resource_name=constants.DEFAULT_CLUSTERNAME,
            namespace=defaults.ROOK_CLUSTER_NAMESPACE,
            kind=constants.STORAGECLUSTER,
        )
        tolerations = (
            '{"tolerations": [{"effect": "NoSchedule", "key": "xyz",'
            '"operator": "Equal", "value": "true"}, '
            '{"effect": "NoSchedule", "key": "node.ocs.openshift.io/storage", '
            '"operator": "Equal", "value": "true"}]}')
        param = (
            f'{{"spec": {{"placement": {{"all": {tolerations}, "mds": {tolerations}, '
            f'"noobaa-core": {tolerations}, "rgw": {tolerations}}}}}}}')
        storagecluster_obj.patch(params=param, format_type="merge")

        # Add tolerations to the subscription
        sub_list = ocp.get_all_resource_names_of_a_kind(
            kind=constants.SUBSCRIPTION)
        param = (
            '{"spec": {"config":  {"tolerations": '
            '[{"effect": "NoSchedule", "key": "xyz", "operator": "Equal", '
            '"value": "true"}]}}}')
        for sub in sub_list:
            sub_obj = ocp.OCP(
                resource_name=sub,
                namespace=defaults.ROOK_CLUSTER_NAMESPACE,
                kind=constants.SUBSCRIPTION,
            )
            sub_obj.patch(params=param, format_type="merge")

        # Add tolerations to the ocsinitializations.ocs.openshift.io
        param = (
            '{"spec":  {"tolerations": '
            '[{"effect": "NoSchedule", "key": "xyz", "operator": "Equal", '
            '"value": "true"}]}}')

        ocsini_obj = ocp.OCP(
            resource_name=constants.OCSINIT,
            namespace=defaults.ROOK_CLUSTER_NAMESPACE,
            kind=constants.OCSINITIALIZATION,
        )
        ocsini_obj.patch(params=param, format_type="merge")

        # Add tolerations to the configmap rook-ceph-operator-config
        configmap_obj = ocp.OCP(
            kind=constants.CONFIGMAP,
            namespace=constants.OPENSHIFT_STORAGE_NAMESPACE,
            resource_name=constants.ROOK_OPERATOR_CONFIGMAP,
        )
        toleration = configmap_obj.get().get("data").get(
            "CSI_PLUGIN_TOLERATIONS")
        toleration += (
            '\n- key: xyz\n  operator: Equal\n  value: "true"\n  effect: NoSchedule'
        )
        toleration = toleration.replace('"', '\\"').replace("\n", "\\n")
        param_cmd = (
            f'[{{"op": "replace", "path": "/data/CSI_PLUGIN_TOLERATIONS", "value": "{toleration}" }}, '
            f'{{"op": "replace", "path": "/data/CSI_PROVISIONER_TOLERATIONS", "value": "{toleration}" }}]'
        )
        configmap_obj.patch(params=param_cmd, format_type="json")

        # After edit noticed few pod respins as expected
        assert wait_for_pods_to_be_running(timeout=600, sleep=15)

        # Respin all pods and check it if is still running
        pod_list = get_all_pods(namespace=defaults.ROOK_CLUSTER_NAMESPACE, )
        for pod in pod_list:
            pod.delete(wait=False)

        assert wait_for_pods_to_be_running(timeout=600, sleep=15)
        self.sanity_helpers.health_check()

        # Add capacity to check if new osds has toleration
        osd_size = storage_cluster.get_osd_size()
        count = storage_cluster.add_capacity(osd_size)
        pod = ocp.OCP(kind=constants.POD,
                      namespace=config.ENV_DATA["cluster_namespace"])
        if is_flexible_scaling_enabled():
            replica_count = 1
        else:
            replica_count = 3
        assert pod.wait_for_resource(
            timeout=300,
            condition=constants.STATUS_RUNNING,
            selector=constants.OSD_APP_LABEL,
            resource_count=count * replica_count,
        ), "New OSDs failed to reach running state"
        check_ceph_health_after_add_capacity(ceph_rebalance_timeout=2500)
示例#17
0
    def test_add_capacity(
        self,
        project_factory,
        multi_dc_pod,
        multi_pvc_factory,
        pod_factory,
        mcg_obj,
        awscli_pod,
        bucket_factory,
        percent_to_fill,
    ):

        #####################################
        #           ENTRY CRITERIA          #
        #####################################
        # Prepare initial configuration : logger, cluster filling, loop for creating & deleting of PVCs and Pods,
        # noobaa IOs etc.,

        # Perform Health checks:
        # Make sure cluster is healthy
        assert ceph_health_check(
            defaults.ROOK_CLUSTER_NAMESPACE
        ), "Entry criteria FAILED: Cluster is Unhealthy"

        # All OCS pods are in running state:
        # ToDo https://github.com/red-hat-storage/ocs-ci/issues/2361
        assert (
            pod_helpers.check_pods_in_running_state()
        ), "Entry criteria FAILED: one or more OCS pods are not in running state"
        # Create the namespace under which this test will execute:
        project = project_factory()

        # total pvc created will be 'num_of_pvcs' * 4 types of pvcs(rbd-rwo,rwx
        # & cephfs-rwo,rwx)
        num_of_pvcs = 40

        rwo_rbd_pods = multi_dc_pod(
            num_of_pvcs=num_of_pvcs,
            pvc_size=175,
            project=project,
            access_mode="RWO",
            pool_type="rbd",
            timeout=360,
        )
        # Note: Skipping cephfs pods creation
        # observing bug https://bugzilla.redhat.com/show_bug.cgi?id=1785399,
        # https://bugzilla.redhat.com/show_bug.cgi?id=1779421#c14
        # Todo: https://github.com/red-hat-storage/ocs-ci/issues/2360

        # Create rwx-rbd pods
        pods_ios_rwx_rbd = multi_dc_pod(
            num_of_pvcs=10,
            pvc_size=175,
            project=project,
            access_mode="RWX-BLK",
            pool_type="rbd",
            timeout=360,
        )

        cluster_fill_io_pods = rwo_rbd_pods
        logger.info("The DC pods are up. Running IOs from them to fill the cluster")
        filler = cluster_exp_helpers.ClusterFiller(
            cluster_fill_io_pods, percent_to_fill, project.namespace
        )
        assert filler.cluster_filler(), "IOs failed"

        # create separate threadpool for running IOs in the background
        executor_run_bg_ios_ops = ThreadPoolExecutor()

        bg_wrap = cluster_exp_helpers.BackgroundOps()
        status_cluster_ios = []
        pods_for_copy = rwo_rbd_pods[0:5] + pods_ios_rwx_rbd

        for p in pods_for_copy:
            logger.info(f"running IOs on {p.name}")
            if p.pod_type == "rbd_block_rwx":
                status_cluster_ios.append(
                    executor_run_bg_ios_ops.submit(
                        bg_wrap.wrap, cluster_exp_helpers.raw_block_io, p, iterations=10
                    )
                )
            else:
                status_cluster_ios.append(
                    executor_run_bg_ios_ops.submit(
                        bg_wrap.wrap,
                        cluster_exp_helpers.cluster_copy_ops,
                        p,
                        iterations=200,
                    )
                )

        # Start pvc ops in the background.:
        logger.info("Started pvc create delete operations")
        executor_run_bg_ios_ops.submit(
            bg_wrap.wrap,
            test_create_delete_pvcs,
            multi_pvc_factory,
            pod_factory,
            project,
            iterations=200,
        )

        # Start NooBaa IOs in the background.:
        logger.info("Started s3_io_create_delete...")

        executor_run_bg_ios_ops.submit(
            bg_wrap.wrap,
            s3_io_create_delete,
            mcg_obj,
            awscli_pod,
            bucket_factory,
            iterations=200,
        )

        logger.info("Started obc_io_create_delete...")

        executor_run_bg_ios_ops.submit(
            bg_wrap.wrap,
            obc_io_create_delete,
            mcg_obj,
            awscli_pod,
            bucket_factory,
            iterations=200,
        )

        # All ocs nodes are in Ready state (including master):
        executor_run_bg_ios_ops.submit(
            bg_wrap.wrap, cluster_exp_helpers.check_nodes_status, iterations=100
        )

        # Get restart count of ocs pods before expanstion
        restart_count_before = pod_helpers.get_pod_restarts_count(
            defaults.ROOK_CLUSTER_NAMESPACE
        )

        # Get osd pods before expansion
        osd_pods_before = pod_helpers.get_osd_pods()

        # Get the total space in cluster before expansion
        ct_pod = pod_helpers.get_ceph_tools_pod()
        output = ct_pod.exec_ceph_cmd(ceph_cmd="ceph osd df")
        total_space_b4_expansion = int(output.get("summary").get("total_kb"))
        logger.info(f"total_space_b4_expansion == {total_space_b4_expansion}")

        logger.info("############## Calling add_capacity $$$$$$$$$$")

        #####################
        # Call add_capacity #
        #####################
        osd_size = storage_cluster.get_osd_size()
        result = storage_cluster.add_capacity(osd_size)
        pod = OCP(kind=constants.POD, namespace=config.ENV_DATA["cluster_namespace"])

        # New osd (all) pods corresponding to the additional capacity should be
        # in running state
        pod.wait_for_resource(
            timeout=1200,
            condition=constants.STATUS_RUNNING,
            selector="app=rook-ceph-osd",
            resource_count=result * 3,
        )

        #################################
        # Exit criteria verification:   #
        #################################
        cluster_exp_helpers.BackgroundOps.EXPANSION_COMPLETED = True

        # No ocs pods should get restarted unexpectedly
        # Get restart count of ocs pods after expansion and see any pods got
        # restated
        restart_count_after = pod_helpers.get_pod_restarts_count(
            defaults.ROOK_CLUSTER_NAMESPACE
        )
        #
        # # TO DO
        # # Handle Bug 1814254 - All Mons respinned during add capacity and OSDs took longtime to come up
        # # implement function to make sure no pods are respun after expansion

        logger.info(
            f"sum(restart_count_before.values()) = {sum(restart_count_before.values())}"
        )
        logger.info(
            f" sum(restart_count_after.values()) = {sum(restart_count_after.values())}"
        )
        assert sum(restart_count_before.values()) == sum(
            restart_count_after.values()
        ), "Exit criteria verification FAILED: One or more pods got restarted"

        logger.info("Exit criteria verification Success: No pods were restarted")
        # Make sure right number of OSDs are added:
        #   Get osd pods after expansion
        osd_pods_after = pod_helpers.get_osd_pods()
        number_of_osds_added = len(osd_pods_after) - len(osd_pods_before)
        logger.info(
            f"### number_of_osds_added = {number_of_osds_added}, "
            f"before = {len(osd_pods_before)}, after = {len(osd_pods_after) }"
        )
        # If the difference b/w updated count of osds and old osd count is not
        # 3 then expansion failed
        assert (
            number_of_osds_added == 3
        ), "Exit criteria verification FAILED: osd count mismatch"

        logger.info(
            "Exit criteria verification Success: Correct number of OSDs are added"
        )

        # The newly added capacity takes into effect at the storage level
        ct_pod = pod_helpers.get_ceph_tools_pod()
        output = ct_pod.exec_ceph_cmd(ceph_cmd="ceph osd df")
        total_space_after_expansion = int(output.get("summary").get("total_kb"))
        osd_size = int(output.get("nodes")[0].get("kb"))
        expanded_space = osd_size * 3  # 3 OSDS are added of size = 'osd_size'
        logger.info(f"space output == {output} ")
        logger.info(f"osd size == {osd_size} ")
        logger.info(f"total_space_after_expansion == {total_space_after_expansion} ")
        expected_total_space_after_expansion = total_space_b4_expansion + expanded_space
        logger.info(
            f"expected_total_space_after_expansion == {expected_total_space_after_expansion} "
        )
        assert (
            total_space_after_expansion == expected_total_space_after_expansion
        ), "Exit criteria verification FAILED: Expected capacity mismatch"

        logger.info(
            "Exit criteria verification Success: Newly added capacity took into effect"
        )

        logger.info("Exit criteria verification Success: IOs completed successfully")
        # 'ceph osd tree' should show the new osds under right nodes/hosts
        #   Verification is different for 3 AZ and 1 AZ configs
        ct_pod = pod_helpers.get_ceph_tools_pod()
        tree_output = ct_pod.exec_ceph_cmd(ceph_cmd="ceph osd tree")
        logger.info(f"### OSD tree output = {tree_output}")
        if config.ENV_DATA["platform"].lower() == constants.VSPHERE_PLATFORM:
            assert cluster_helpers.check_osd_tree_1az_vmware(
                tree_output, len(osd_pods_after)
            ), "Exit criteria verification FAILED: Incorrect ceph osd tree formation found"

        aws_number_of_zones = 3
        if config.ENV_DATA["platform"].lower() == constants.AWS_PLATFORM:
            # parse the osd tree. if it contains a node 'rack' then it's a
            # AWS_1AZ cluster. Else, 3 AWS_3AZ cluster
            for i in range(len(tree_output["nodes"])):
                if tree_output["nodes"][i]["name"] in "rack0":
                    aws_number_of_zones = 1
            if aws_number_of_zones == 1:
                assert cluster_helpers.check_osd_tree_1az_aws(
                    tree_output, len(osd_pods_after)
                ), "Exit criteria verification FAILED: Incorrect ceph osd tree formation found"
            else:
                assert cluster_helpers.check_osd_tree_3az_aws(
                    tree_output, len(osd_pods_after)
                ), "Exit criteria verification FAILED: Incorrect ceph osd tree formation found"

        logger.info("Exit criteria verification Success: osd tree verification success")

        # Make sure new pvcs and pods can be created and IOs can be run from
        # the pods
        num_of_pvcs = 1
        rwo_rbd_pods = multi_dc_pod(
            num_of_pvcs=num_of_pvcs,
            pvc_size=5,
            project=project,
            access_mode="RWO",
            pool_type="rbd",
        )
        rwo_cephfs_pods = multi_dc_pod(
            num_of_pvcs=num_of_pvcs,
            pvc_size=5,
            project=project,
            access_mode="RWO",
            pool_type="cephfs",
        )
        rwx_cephfs_pods = multi_dc_pod(
            num_of_pvcs=num_of_pvcs,
            pvc_size=5,
            project=project,
            access_mode="RWX",
            pool_type="cephfs",
        )
        # Create rwx-rbd pods
        pods_ios_rwx_rbd = multi_dc_pod(
            num_of_pvcs=num_of_pvcs,
            pvc_size=5,
            project=project,
            access_mode="RWX-BLK",
            pool_type="rbd",
        )
        cluster_io_pods = (
            rwo_rbd_pods + rwo_cephfs_pods + rwx_cephfs_pods + pods_ios_rwx_rbd
        )

        with ThreadPoolExecutor() as pod_ios_executor:
            for p in cluster_io_pods:
                if p.pod_type == "rbd_block_rwx":
                    logger.info(f"Calling block fio on pod {p.name}")
                    pod_ios_executor.submit(cluster_exp_helpers.raw_block_io, p, "100M")
                else:
                    logger.info(f"calling file fio on pod {p.name}")
                    pod_ios_executor.submit(p.run_io, "fs", "100M")

        for pod_io in cluster_io_pods:
            pod_helpers.get_fio_rw_iops(pod_io)

        # Verify OSDs are encrypted
        if config.ENV_DATA.get("encryption_at_rest"):
            osd_encryption_verification()

        cluster_obj = cluster_helpers.CephCluster()
        assert (
            cluster_obj.get_ceph_health() != "HEALTH_ERR"
        ), "Ceph cluster health checking failed"

        logger.info("ALL Exit criteria verification successfully")
        logger.info(
            "********************** TEST PASSED *********************************"
        )