def test_registry_respin_pod(self, pod_name, iterations): """ Test registry workload when backed by OCS respin of ceph pods """ # Respin relevant pod log.info(f"Respin Ceph pod {pod_name}") disruption = disruption_helpers.Disruptions() disruption.set_resource(resource=f'{pod_name}') disruption.delete_resource() # Start SVT workload for pushing images to registry svt_setup(iterations=iterations) # Image pull and push to registry image_pull(image_url=IMAGE_URL) self.image_path = image_push( image_url=IMAGE_URL, namespace=OPENSHIFT_IMAGE_REGISTRY_NAMESPACE ) # List the images in registry img_list = image_list_all() log.info(f"Image list {img_list}") # Check either image present in registry or not validate = check_image_exists_in_registry(image_url=IMAGE_URL) if not validate: raise UnexpectedBehaviour("Image URL not present in registry") # Validate image registry pods validate_registry_pod_status() # Validate cluster health ok and all pods are running self.sanity_helpers.health_check()
def test_registry_respin_pod(self, pod_name): """ Test registry workload when backed by OCS respin of ceph pods """ # Respin relevant pod log.info(f"Respin Ceph pod {pod_name}") disruption = disruption_helpers.Disruptions() disruption.set_resource(resource=f'{pod_name}') disruption.delete_resource() # Pull and push images to registries log.info("Pull and push images to registries") image_pull_and_push( project_name=self.project_name, template='eap-cd-basic-s2i', image= 'registry.redhat.io/jboss-eap-7-tech-preview/eap-cd-openshift-rhel8:latest', pattern='eap-app') # Validate image exists in registries path validate_image_exists(namespace=self.project_name) # Validate image registry pods validate_registry_pod_status() # Validate cluster health ok and all pods are running self.sanity_helpers.health_check()
def disrupt_plugin_provisioner_pods(self, node_list): """ Set leader plugin-provisioner resources for disruption, skip if running on node from the node_list Args: node_list (list): list of node names to check Returns: list: list of Disruption objects """ provisioner_resource = [] for interface in [constants.CEPHBLOCKPOOL, constants.CEPHFILESYSTEM]: provisioner_pod = pod.plugin_provisioner_leader(interface=interface) node_name = pod.get_pod_node(provisioner_pod).name if node_name not in node_list: if interface == constants.CEPHBLOCKPOOL: provisioner_resource.append('rbdplugin_provisioner') else: provisioner_resource.append('cephfsplugin_provisioner') disruptor = [] for resource in provisioner_resource: disruption = disruption_helpers.Disruptions() disruption.set_resource(resource=resource) disruptor.append(disruption) return disruptor
def test_run_pgsql(self, pgsql, transactions, pod_name): """ Test pgsql workload """ # Create pgbench benchmark pgsql.create_pgbench_benchmark(replicas=3, transactions=transactions, clients=3) # Wait for pgbench pod to reach running state pgsql.wait_for_pgbench_status(status=constants.STATUS_RUNNING) # Respin Ceph pod log.info(f"Respin Ceph pod {pod_name}") disruption = disruption_helpers.Disruptions() disruption.set_resource(resource=f'{pod_name}') disruption.delete_resource() # Wait for pg_bench pod to complete pgsql.wait_for_pgbench_status(status=constants.STATUS_COMPLETED) # Get pgbench pods pgbench_pods = pgsql.get_pgbench_pods() # Validate pgbench run and parse logs pgsql.validate_pgbench_run(pgbench_pods)
def test_respin_osd_pods_to_verify_logging(self, create_pvc_and_deploymentconfig_pod): """ This function creates projects before and after respin of osd and verify project existence in EFK stack. 1. Creates new project with PVC and app-pods 2. Respins osd 3. Logs into the EFK stack and checks for the health of cluster-logging 4. Logs into the EFK stack and checks project existence 5. Checks for the shards of the project in the EFK stack 6. Creates new project and checks the existence again """ # Create 1st project and app_pod dc_pod_obj, dc_pvc_obj = create_pvc_and_deploymentconfig_pod project1 = dc_pvc_obj.project.namespace # Delete the OSD pod disruption = disruption_helpers.Disruptions() disruption.set_resource(resource='osd') disruption.delete_resource() # Check the health of the cluster-logging assert ocp_logging_obj.check_health_of_clusterlogging() # Check for the 1st project created in EFK stack before the respin self.validate_project_exists(dc_pvc_obj) # Check the files in the project elasticsearch_pod_obj = self.get_elasticsearch_pod_obj() project1_filecount = elasticsearch_pod_obj.exec_cmd_on_pod( command=f'es_util --query=project.{project1}.*/_count' ) assert project1_filecount['_shards']['successful'] != 0, ( f"No files found in project {project1}" ) logger.info(f'Total number of files in project 1 {project1_filecount}') # Create another app_pod in new project pod_obj, pvc_obj = create_pvc_and_deploymentconfig_pod project2 = pvc_obj.project.namespace # Check the 2nd project exists in the EFK stack self.validate_project_exists(pvc_obj) project2_filecount = elasticsearch_pod_obj.exec_cmd_on_pod( command=f'es_util --query=project.{project2}.*/_count', out_yaml_format=True ) assert project2_filecount['_shards']['successful'] != 0, ( f"No files found in project {project2}" ) logger.info(f'Total number of files in the project 2 {project2_filecount}')
def test_monitoring_after_respinning_ceph_pods(self, test_fixture): """ Test case to validate respinning the ceph pods and its interaction with prometheus pod """ namespace_list, pvc_objs, pod_objs, sc = test_fixture # Re-spin the ceph pods(i.e mgr, mon, osd, mds) one by one resource_to_delete = ['mgr', 'mon', 'osd'] disruption = disruption_helpers.Disruptions() for res_to_del in resource_to_delete: disruption.set_resource(resource=res_to_del) disruption.delete_resource() # Check for the created pvc metrics after respinning ceph pods for pvc_obj in pvc_objs: assert check_pvcdata_collected_on_prometheus(pvc_obj.name), ( f"On prometheus pod for created pvc {pvc_obj.name} related data is not collected" ) # Create projects after the respinning ceph pods namespaces = helpers.create_multilpe_projects(number_of_project=2) namespace_list.extend(namespaces) # Create pvcs after the respinning ceph pods pvcs = [ helpers.create_pvc(sc_name=sc.name, namespace=each_namespace.namespace) for each_namespace in namespaces ] for pvc_obj in pvcs: helpers.wait_for_resource_state(pvc_obj, constants.STATUS_BOUND) pvc_obj.reload() pvc_objs.extend(pvcs) # Create app pods after the respinning ceph pods pods = [ helpers.create_pod(interface_type=constants.CEPHBLOCKPOOL, pvc_name=each_pvc.name, namespace=each_pvc.namespace) for each_pvc in pvcs ] for pod_obj in pods: helpers.wait_for_resource_state(pod_obj, constants.STATUS_RUNNING) pod_obj.reload() pod_objs.extend(pods) # Check for the created pvc metrics on prometheus pod for pvc_obj in pvcs: assert check_pvcdata_collected_on_prometheus(pvc_obj.name), ( f"On prometheus pod for created pvc {pvc_obj.name} related data is not collected" )
def respin_ceph_pod(self, resource_to_delete): """ Function to respin ceph pods one by one, delete_resource functions checks for the deleted pod back up and running Args: resource_to_delete (str): Ceph resource type to be deleted, eg: mgr/mon/osd/mds """ disruption = disruption_helpers.Disruptions() disruption.set_resource(resource=resource_to_delete) no_of_resource = disruption.resource_count for i in range(0, no_of_resource): disruption.delete_resource(resource_id=i)
def test_pv_scale_out_create_pvcs_and_respin_ceph_pods( self, fioscale, resource_to_delete, ): """ Test case to scale PVC+POD with multi projects and reach expected PVC count """ disruption = disruption_helpers.Disruptions() disruption.set_resource(resource=resource_to_delete) no_of_resource = disruption.resource_count for i in range(0, no_of_resource): disruption.delete_resource(resource_id=i) utils.ceph_health_check()
def test_run_pgsql_respin_pod(self, pgsql, transactions, pod_name): """ Test pgsql workload """ # Create pgbench benchmark pgsql.create_pgbench_benchmark(replicas=3, transactions=transactions, clients=3) # Start measuring time start_time = datetime.now() # Wait for pgbench pod to reach running state pgsql.wait_for_pgbench_status(status=constants.STATUS_RUNNING) # Check worker node utilization(adm_top) get_node_resource_utilization_from_adm_top(node_type='worker', print_table=True) # Respin relevant pod if pod_name == 'postgers': pgsql.respin_pgsql_app_pod() else: log.info(f"Respin Ceph pod {pod_name}") disruption = disruption_helpers.Disruptions() disruption.set_resource(resource=f'{pod_name}') disruption.delete_resource() # Wait for pg_bench pod to complete pgsql.wait_for_pgbench_status(status=constants.STATUS_COMPLETED) # Calculate the time from running state to completed state end_time = datetime.now() diff_time = end_time - start_time log.info( f"\npgbench pod reached to completed state after {diff_time.seconds} seconds\n" ) # Get pgbench pods pgbench_pods = pgsql.get_pgbench_pods() # Validate pgbench run and parse logs pgsql.validate_pgbench_run(pgbench_pods)
def test_respin_osd_pods_to_verify_logging( self, create_pvc_and_deploymentconfig_pod): """ This function creates projects before and after respin of osd and verify project existence in EFK stack. 1. Creates new project with PVC and app-pods 2. Respins osd 3. Logs into the EFK stack and checks for the health of cluster-logging 4. Logs into the EFK stack and checks project existence 5. Checks for the shards of the project in the EFK stack 6. Creates new project and checks the existence again """ # Create 1st project and app_pod dc_pod_obj, dc_pvc_obj = create_pvc_and_deploymentconfig_pod project1 = dc_pvc_obj.project.namespace # Delete the OSD pod disruption = disruption_helpers.Disruptions() disruption.set_resource(resource='osd') disruption.delete_resource() # Check the health of the cluster-logging assert ocp_logging_obj.check_health_of_clusterlogging() # Check for the 1st project created in EFK stack before the respin self.validate_project_exists(project1) # Check the files in the project self.check_filecount_in_project(project1) # Create another app_pod in new project pod_obj, pvc_obj = create_pvc_and_deploymentconfig_pod project2 = pvc_obj.project.namespace # Check the 2nd project exists in the EFK stack self.validate_project_exists(project2) self.check_filecount_in_project(project2)
def test_run_jenkins_respin_pod(self, jenkins, pod_name, num_projects, num_of_builds): """ Test jenkins workload """ # Init number of projects jenkins.number_projects = num_projects # Create app jenkins jenkins.create_app_jenkins() # Create jenkins pvc jenkins.create_jenkins_pvc() # Create jenkins build config jenkins.create_jenkins_build_config() # Wait jenkins deploy pod reach to completed state jenkins.wait_for_jenkins_deploy_status(status=STATUS_COMPLETED) # Init number of builds per project jenkins.number_builds_per_project = num_of_builds # Start Builds jenkins.start_build() # Respin pod log.info(f"Respin pod {pod_name}") disruption = disruption_helpers.Disruptions() disruption.set_resource(resource=f'{pod_name}') disruption.delete_resource() # Wait build reach 'Complete' state jenkins.wait_for_build_to_complete() # Print table of builds jenkins.print_completed_builds_results()
import logging from concurrent.futures import ThreadPoolExecutor import pytest from functools import partial from ocs_ci.framework.testlib import ManageTest, tier4, tier4a from ocs_ci.framework import config from ocs_ci.ocs import constants from ocs_ci.ocs.resources.pvc import get_all_pvcs from ocs_ci.ocs.resources import pod from ocs_ci.utility.utils import TimeoutSampler, ceph_health_check from tests import helpers, disruption_helpers logger = logging.getLogger(__name__) DISRUPTION_OPS = disruption_helpers.Disruptions() @tier4 @tier4a @pytest.mark.parametrize( argnames=["interface", "operation_to_disrupt", "resource_to_delete"], argvalues=[ pytest.param(*[constants.CEPHBLOCKPOOL, 'create_pvc', 'mgr'], marks=pytest.mark.polarion_id("OCS-568")), pytest.param(*[constants.CEPHBLOCKPOOL, 'create_pod', 'mgr'], marks=pytest.mark.polarion_id("OCS-569")), pytest.param(*[constants.CEPHBLOCKPOOL, 'run_io', 'mgr'], marks=pytest.mark.polarion_id("OCS-570")), pytest.param(*[constants.CEPHBLOCKPOOL, 'create_pvc', 'mon'], marks=pytest.mark.polarion_id("OCS-561")),
def test_ceph_daemon_kill_during_pod_pvc_deletion(self, interface, operation_to_disrupt, resource_name, setup_base): """ Kill 'resource_name' daemon while deletion of PVCs/pods is progressing """ pvc_objs, self.pod_objs = setup_base sc_obj = pvc_objs[0].storageclass self.namespace = pvc_objs[0].project.namespace pod_functions = { 'mds': partial(get_mds_pods), 'mon': partial(get_mon_pods), 'mgr': partial(get_mgr_pods), 'osd': partial(get_osd_pods), 'rbdplugin': partial(get_plugin_pods, interface=interface), 'cephfsplugin': partial(get_plugin_pods, interface=interface), 'cephfsplugin_provisioner': partial(get_cephfsplugin_provisioner_pods), 'rbdplugin_provisioner': partial(get_rbdfsplugin_provisioner_pods), 'operator': partial(get_operator_pods) } disruption = disruption_helpers.Disruptions() disruption.set_resource(resource=resource_name) executor = ThreadPoolExecutor(max_workers=1) # Get number of pods of type 'resource_name' num_of_resource_pods = len(pod_functions[resource_name]()) # Fetch the number of Pods and PVCs initial_num_of_pods = len(get_all_pods(namespace=self.namespace)) initial_num_of_pvc = len( get_all_pvcs(namespace=self.namespace)['items']) # Fetch PV names pv_objs = [] for pvc_obj in pvc_objs: pvc_obj.reload() pv_objs.append(pvc_obj.backed_pv_obj) # Fetch volume details from pods for the purpose of verification node_pv_dict = {} for pod_obj in self.pod_objs: pod_info = pod_obj.get() node = pod_info['spec']['nodeName'] pvc = pod_info['spec']['volumes'][0]['persistentVolumeClaim'][ 'claimName'] for pvc_obj in pvc_objs: if pvc_obj.name == pvc: pvc_obj.reload() pv = pvc_obj.backed_pv break if node in node_pv_dict: node_pv_dict[node].append(pv) else: node_pv_dict[node] = [pv] # Do setup for running IO on pods log.info("Setting up pods for running IO") for pod_obj in self.pod_objs: pod_obj.workload_setup(storage_type='fs') log.info("Setup for running IO is completed on pods") # Start IO on each pod. RWX PVC will be used on two pods. So split the # size accordingly log.info("Starting IO on pods") for pod_obj in self.pod_objs: if pod_obj.pvc.access_mode == constants.ACCESS_MODE_RWX: io_size = int((self.pvc_size - 1) / 2) else: io_size = self.pvc_size - 1 pod_obj.run_io(storage_type='fs', size=f'{io_size}G', fio_filename=f'{pod_obj.name}_io') log.info("IO started on all pods.") # Set the daemon to be killed disruption.select_daemon() # Start deleting pods pod_bulk_delete = executor.submit(self.delete_pods) if operation_to_disrupt == 'delete_pods': ret = self.verify_resource_deletion(get_all_pods, initial_num_of_pods) assert ret, "Wait timeout: Pods are not being deleted." log.info("Pods deletion has started.") disruption.kill_daemon() pods_deleted = pod_bulk_delete.result() assert pods_deleted, "Deletion of pods failed." # Verify pods are deleted for pod_obj in self.pod_objs: assert pod_obj.ocp.wait_for_delete( pod_obj.name, 180), (f"Pod {pod_obj.name} is not deleted") log.info("Verified: Pods are deleted.") # Verify that the mount point is removed from nodes after deleting pod for node, pvs in node_pv_dict.items(): cmd = f'oc debug nodes/{node} -- df' df_on_node = run_cmd(cmd) for pv in pvs: assert pv not in df_on_node, ( f"{pv} is still present on node {node} after " f"deleting the pods.") log.info( "Verified: mount points are removed from nodes after deleting " "the pods.") # Fetch image uuid associated with PVCs pvc_uuid_map = {} for pvc_obj in pvc_objs: pvc_uuid_map[pvc_obj.name] = pvc_obj.image_uuid log.info("Fetched image uuid associated with each PVC") # Start deleting PVCs pvc_bulk_delete = executor.submit(delete_pvcs, pvc_objs) if operation_to_disrupt == 'delete_pvcs': ret = self.verify_resource_deletion(get_all_pvcs, initial_num_of_pvc) assert ret, "Wait timeout: PVCs are not being deleted." log.info("PVCs deletion has started.") disruption.kill_daemon() pvcs_deleted = pvc_bulk_delete.result() assert pvcs_deleted, "Deletion of PVCs failed." # Verify PVCs are deleted for pvc_obj in pvc_objs: assert pvc_obj.ocp.wait_for_delete( pvc_obj.name), (f"PVC {pvc_obj.name} is not deleted") log.info("Verified: PVCs are deleted.") # Verify PVs are deleted for pv_obj in pv_objs: assert pv_obj.ocp.wait_for_delete( pv_obj.name, 120), (f"PV {pv_obj.name} is not deleted") log.info("Verified: PVs are deleted.") # Verify PV using ceph toolbox. Image/Subvolume should be deleted. for pvc_name, uuid in pvc_uuid_map.items(): if interface == constants.CEPHBLOCKPOOL: ret = verify_volume_deleted_in_backend( interface=interface, image_uuid=uuid, pool_name=sc_obj.ceph_pool.name) if interface == constants.CEPHFILESYSTEM: ret = verify_volume_deleted_in_backend(interface=interface, image_uuid=uuid) assert ret, (f"Volume associated with PVC {pvc_name} still exists " f"in backend") # Verify number of pods of type 'resource_name' final_num_of_resource_pods = len(pod_functions[resource_name]()) assert final_num_of_resource_pods == num_of_resource_pods, ( f"Total number of {resource_name} pods is not matching with " f"initial value. Total number of pods before daemon kill: " f"{num_of_resource_pods}. Total number of pods present now: " f"{final_num_of_resource_pods}") # Check ceph status ceph_health_check(namespace=config.ENV_DATA['cluster_namespace']) log.info("Ceph cluster health is OK")
def test_daemon_kill_during_pvc_pod_creation_and_io( self, interface, resource_name, setup, multi_pvc_factory, pod_factory ): """ Kill 'resource_name' daemon while PVCs creation, pods creation and IO operation are progressing. """ num_of_new_pvcs = 5 pvc_objs, io_pods, pvc_objs_new_pods, access_modes = setup proj_obj = pvc_objs[0].project storageclass = pvc_objs[0].storageclass pod_functions = { 'mds': partial(get_mds_pods), 'mon': partial(get_mon_pods), 'mgr': partial(get_mgr_pods), 'osd': partial(get_osd_pods), 'rbdplugin': partial(get_plugin_pods, interface=interface), 'cephfsplugin': partial(get_plugin_pods, interface=interface), 'cephfsplugin_provisioner': partial(get_cephfsplugin_provisioner_pods), 'rbdplugin_provisioner': partial(get_rbdfsplugin_provisioner_pods), 'operator': partial(get_operator_pods) } executor = ThreadPoolExecutor(max_workers=len(io_pods)) disruption = disruption_helpers.Disruptions() disruption.set_resource(resource=resource_name) # Get number of pods of type 'resource_name' resource_pods_num = len(pod_functions[resource_name]()) # Do setup for running IO on pods log.info("Setting up pods for running IO") for pod_obj in io_pods: if pod_obj.pvc.volume_mode == 'Block': storage_type = 'block' else: storage_type = 'fs' executor.submit(pod_obj.workload_setup, storage_type=storage_type) # Wait for setup on pods to complete for pod_obj in io_pods: log.info(f"Waiting for IO setup to complete on pod {pod_obj.name}") for sample in TimeoutSampler( 180, 2, getattr, pod_obj, 'wl_setup_done' ): if sample: log.info( f"Setup for running IO is completed on pod " f"{pod_obj.name}." ) break log.info("Setup for running IO is completed on pods") # Set daemon to be killed disruption.select_daemon() # Start creating new pods log.info("Start creating new pods.") bulk_pod_create = executor.submit( helpers.create_pods, pvc_objs_new_pods, pod_factory, interface, 2 ) # Start creation of new PVCs log.info("Start creating new PVCs.") bulk_pvc_create = executor.submit( multi_pvc_factory, interface=interface, project=proj_obj, storageclass=storageclass, size=self.pvc_size, access_modes=access_modes, access_modes_selection='distribute_random', status="", num_of_pvc=num_of_new_pvcs, wait_each=False ) # Start IO on each pod log.info("Start IO on pods") for pod_obj in io_pods: if pod_obj.pvc.volume_mode == 'Block': storage_type = 'block' else: storage_type = 'fs' pod_obj.run_io( storage_type=storage_type, size='1G', runtime=10, fio_filename=f'{pod_obj.name}_io_file1' ) log.info("IO started on all pods.") # Kill daemon disruption.kill_daemon() # Getting result of PVC creation as list of PVC objects pvc_objs_new = bulk_pvc_create.result() # Confirm PVCs are Bound for pvc_obj in pvc_objs_new: helpers.wait_for_resource_state( resource=pvc_obj, state=constants.STATUS_BOUND, timeout=180 ) pvc_obj.reload() log.info("Verified: New PVCs are Bound.") # Getting result of pods creation as list of Pod objects pod_objs_new = bulk_pod_create.result() # Verify new pods are Running for pod_obj in pod_objs_new: helpers.wait_for_resource_state( resource=pod_obj, state=constants.STATUS_RUNNING ) pod_obj.reload() log.info("Verified: All new pods are Running.") # Verify IO log.info("Fetching IO results from IO pods.") for pod_obj in io_pods: fio_result = pod_obj.get_fio_results() err_count = fio_result.get('jobs')[0].get('error') assert err_count == 0, ( f"FIO error on pod {pod_obj.name}. FIO result: {fio_result}" ) log.info(f"IOPs after FIO on pod {pod_obj.name}:") log.info( f"Read: {fio_result.get('jobs')[0].get('read').get('iops')}" ) log.info( f"Write: {fio_result.get('jobs')[0].get('write').get('iops')}" ) log.info("Verified IO result on IO pods.") all_pod_objs = io_pods + pod_objs_new # Fetch volume details from pods for the purpose of verification node_pv_dict = {} for pod in all_pod_objs: pod_info = pod.get() node = pod_info['spec']['nodeName'] pvc = pod_info['spec']['volumes'][0]['persistentVolumeClaim']['claimName'] for pvc_obj in pvc_objs: if pvc_obj.name == pvc: pvc_obj.reload() pv = pvc_obj.backed_pv break if node in node_pv_dict: node_pv_dict[node].append(pv) else: node_pv_dict[node] = [pv] # Delete pods for pod_obj in all_pod_objs: pod_obj.delete(wait=False) # Verify pods are deleted for pod_obj in all_pod_objs: pod_obj.ocp.wait_for_delete(resource_name=pod_obj.name) # Verify number of 'resource_name' type pods final_resource_pods_num = len(pod_functions[resource_name]()) assert final_resource_pods_num == resource_pods_num, ( f"Total number of {resource_name} pods is not matching with " f"initial value. Total number of pods before daemon kill: " f"{resource_pods_num}. Total number of pods present now: " f"{final_resource_pods_num}" ) # Verify volumes are unmapped from nodes after deleting the pods node_pv_mounted = helpers.verify_pv_mounted_on_node(node_pv_dict) for node, pvs in node_pv_mounted.items(): assert not pvs, ( f"PVs {pvs} is still present on node {node} after " f"deleting the pods." ) log.info( "Verified: mount points are removed from nodes after deleting " "the pods" ) # Set volume mode on PVC objects for pvc_obj in pvc_objs_new: pvc_info = pvc_obj.get() setattr(pvc_obj, 'volume_mode', pvc_info['spec']['volumeMode']) # Verify that PVCs are reusable by creating new pods all_pvc_objs = pvc_objs + pvc_objs_new pod_objs_re = helpers.create_pods( all_pvc_objs, pod_factory, interface, 2 ) # Verify pods are Running for pod_obj in pod_objs_re: helpers.wait_for_resource_state( resource=pod_obj, state=constants.STATUS_RUNNING ) pod_obj.reload() log.info("Successfully created new pods using all PVCs.") # Run IO on each of the newly created pods for pod_obj in pod_objs_re: if pod_obj.pvc.volume_mode == 'Block': storage_type = 'block' else: storage_type = 'fs' pod_obj.run_io( storage_type=storage_type, size='1G', runtime=10, fio_filename=f'{pod_obj.name}_io_file2' ) log.info("Fetching IO results from newly created pods") for pod_obj in pod_objs_re: fio_result = pod_obj.get_fio_results() err_count = fio_result.get('jobs')[0].get('error') assert err_count == 0, ( f"FIO error on pod {pod_obj.name}. FIO result: {fio_result}" ) log.info(f"IOPs after FIO on pod {pod_obj.name}:") log.info( f"Read: {fio_result.get('jobs')[0].get('read').get('iops')}" ) log.info( f"Write: {fio_result.get('jobs')[0].get('write').get('iops')}" ) log.info("Verified IO result on newly created pods.")
def test_disruptive_during_pod_pvc_deletion_and_io( self, interface, resource_to_delete, setup_base ): """ Delete ceph/rook pod while PVCs deletion, pods deletion and IO are progressing """ pvc_objs, pod_objs, rwx_pod_objs = setup_base sc_obj = pvc_objs[0].storageclass namespace = pvc_objs[0].project.namespace num_of_pods_to_delete = 10 num_of_io_pods = 5 # Select pods to be deleted pods_to_delete = pod_objs[:num_of_pods_to_delete] pods_to_delete.extend( [pod for pod in rwx_pod_objs for pod_obj in pods_to_delete if ( pod_obj.pvc == pod.pvc )] ) # Select pods to run IO io_pods = pod_objs[num_of_pods_to_delete:num_of_pods_to_delete + num_of_io_pods] io_pods.extend( [pod for pod in rwx_pod_objs for pod_obj in io_pods if ( pod_obj.pvc == pod.pvc )] ) # Select pods which are having PVCs to delete pods_for_pvc = pod_objs[num_of_pods_to_delete + num_of_io_pods:] pvcs_to_delete = [pod_obj.pvc for pod_obj in pods_for_pvc] pods_for_pvc.extend( [pod for pod in rwx_pod_objs for pod_obj in pods_for_pvc if ( pod_obj.pvc == pod.pvc )] ) log.info( f"{len(pods_to_delete)} pods selected for deletion in which " f"{len(pods_to_delete) - num_of_pods_to_delete} pairs of pod " f"share same RWX PVC" ) log.info( f"{len(io_pods)} pods selected for running IO in which " f"{len(io_pods) - num_of_io_pods} pairs of pod share same " f"RWX PVC" ) no_of_rwx_pvcs_delete = len(pods_for_pvc) - len(pvcs_to_delete) log.info( f"{len(pvcs_to_delete)} PVCs selected for deletion. " f"RWO PVCs: {len(pvcs_to_delete) - no_of_rwx_pvcs_delete}, " f"RWX PVCs: {no_of_rwx_pvcs_delete}" ) pod_functions = { 'mds': get_mds_pods, 'mon': get_mon_pods, 'mgr': get_mgr_pods, 'osd': get_osd_pods } disruption = disruption_helpers.Disruptions() disruption.set_resource(resource=resource_to_delete) executor = ThreadPoolExecutor(max_workers=len(pod_objs)) # Get number of pods of type 'resource_to_delete' num_of_resource_to_delete = len(pod_functions[resource_to_delete]()) # Fetch the number of Pods and PVCs initial_num_of_pods = len(get_all_pods(namespace=namespace)) initial_num_of_pvc = len( get_all_pvcs(namespace=namespace)['items'] ) # Fetch PV names to verify after deletion pv_objs = [] for pvc_obj in pvcs_to_delete: pvc_obj.reload() pv_objs.append(pvc_obj.backed_pv_obj) # Fetch volume details from pods for the purpose of verification node_pv_dict = {} for pod_obj in pods_to_delete: pod_info = pod_obj.get() node = pod_info['spec']['nodeName'] pvc = pod_info['spec']['volumes'][0]['persistentVolumeClaim']['claimName'] for pvc_obj in pvc_objs: if pvc_obj.name == pvc: pvc_obj.reload() pv = pvc_obj.backed_pv break if node in node_pv_dict: node_pv_dict[node].append(pv) else: node_pv_dict[node] = [pv] # Fetch image uuid associated with PVCs to be deleted pvc_uuid_map = {} for pvc_obj in pvcs_to_delete: pvc_uuid_map[pvc_obj.name] = pvc_obj.image_uuid log.info("Fetched image uuid associated with each PVC") # Do setup on pods for running IO log.info("Setting up pods for running IO.") for pod_obj in pod_objs + rwx_pod_objs: executor.submit(pod_obj.workload_setup, storage_type='fs') # Wait for setup on pods to complete for pod_obj in pod_objs + rwx_pod_objs: for sample in TimeoutSampler( 180, 2, getattr, pod_obj, 'wl_setup_done' ): if sample: log.info( f"Setup for running IO is completed on pod " f"{pod_obj.name}." ) break log.info("Setup for running IO is completed on all pods.") # Start IO on pods having PVCs to delete to load data log.info("Starting IO on pods having PVCs to delete.") self.run_io_on_pods(pods_for_pvc) log.info("IO started on pods having PVCs to delete.") log.info("Fetching IO results from the pods having PVCs to delete.") for pod_obj in pods_for_pvc: get_fio_rw_iops(pod_obj) log.info("Verified IO result on pods having PVCs to delete.") # Delete pods having PVCs to delete. assert self.delete_pods(pods_for_pvc), ( "Couldn't delete pods which are having PVCs to delete." ) for pod_obj in pods_for_pvc: pod_obj.ocp.wait_for_delete(pod_obj.name) log.info("Verified: Deleted pods which are having PVCs to delete.") # Start IO on pods to be deleted log.info("Starting IO on pods to be deleted.") self.run_io_on_pods(pods_to_delete) log.info("IO started on pods to be deleted.") # Start deleting PVCs pvc_bulk_delete = executor.submit(delete_pvcs, pvcs_to_delete) log.info("Started deleting PVCs") # Start deleting pods pod_bulk_delete = executor.submit(self.delete_pods, pods_to_delete) log.info("Started deleting pods") # Start IO on IO pods self.run_io_on_pods(io_pods) log.info("Started IO on IO pods") # Verify pvc deletion has started pvc_deleting = executor.submit( wait_for_resource_count_change, func_to_use=get_all_pvcs, previous_num=initial_num_of_pvc, namespace=namespace, change_type='decrease', min_difference=1, timeout=30, interval=0.01 ) # Verify pod deletion has started pod_deleting = executor.submit( wait_for_resource_count_change, func_to_use=get_all_pods, previous_num=initial_num_of_pods, namespace=namespace, change_type='decrease', min_difference=1, timeout=30, interval=0.01 ) assert pvc_deleting.result(), ( "Wait timeout: PVCs are not being deleted." ) log.info("PVCs deletion has started.") assert pod_deleting.result(), ( "Wait timeout: Pods are not being deleted." ) log.info("Pods deletion has started.") # Delete pod of type 'resource_to_delete' disruption.delete_resource() pods_deleted = pod_bulk_delete.result() assert pods_deleted, "Deletion of pods failed." # Verify pods are deleted for pod_obj in pods_to_delete: pod_obj.ocp.wait_for_delete(pod_obj.name) log.info("Verified: Pods are deleted.") # Verify that the mount point is removed from nodes after deleting pod node_pv_mounted = verify_pv_mounted_on_node(node_pv_dict) for node, pvs in node_pv_mounted.items(): assert not pvs, ( f"PVs {pvs} is still present on node {node} after " f"deleting the pods." ) log.info( "Verified: mount points are removed from nodes after deleting " "the pods" ) pvcs_deleted = pvc_bulk_delete.result() assert pvcs_deleted, "Deletion of PVCs failed." # Verify PVCs are deleted for pvc_obj in pvcs_to_delete: pvc_obj.ocp.wait_for_delete(pvc_obj.name) logging.info("Verified: PVCs are deleted.") # Verify PVs are deleted for pv_obj in pv_objs: pv_obj.ocp.wait_for_delete(resource_name=pv_obj.name, timeout=300) logging.info("Verified: PVs are deleted.") # Verify PV using ceph toolbox. Image/Subvolume should be deleted. for pvc_name, uuid in pvc_uuid_map.items(): if interface == constants.CEPHBLOCKPOOL: ret = verify_volume_deleted_in_backend( interface=interface, image_uuid=uuid, pool_name=sc_obj.ceph_pool.name ) if interface == constants.CEPHFILESYSTEM: ret = verify_volume_deleted_in_backend( interface=interface, image_uuid=uuid ) assert ret, ( f"Volume associated with PVC {pvc_name} still exists " f"in backend" ) log.info("Fetching IO results from the pods.") for pod_obj in io_pods: fio_result = pod_obj.get_fio_results() err_count = fio_result.get('jobs')[0].get('error') assert err_count == 0, ( f"FIO error on pod {pod_obj.name}. FIO result: {fio_result}" ) log.info("Verified IO result on pods.") # Verify number of pods of type 'resource_to_delete' final_num_resource_to_delete = len(pod_functions[resource_to_delete]()) assert final_num_resource_to_delete == num_of_resource_to_delete, ( f"Total number of {resource_to_delete} pods is not matching with " f"initial value. Total number of pods before deleting a pod: " f"{num_of_resource_to_delete}. Total number of pods present now: " f"{final_num_resource_to_delete}" ) # Check ceph status ceph_health_check(namespace=config.ENV_DATA['cluster_namespace']) log.info("Ceph cluster health is OK")
def test_run_pgsql(self, transactions, pod_name): """ Test pgsql workload """ # Create pgbench benchmark log.info("Create resource file for pgbench workload") pg_trans = transactions timeout = pg_trans * 3 pg_data = templating.load_yaml(constants.PGSQL_BENCHMARK_YAML) pg_data['spec']['workload']['args']['transactions'] = pg_trans pg_obj = OCS(**pg_data) pg_obj.create() # Wait for pgbench pod to be created for pgbench_pod in TimeoutSampler( pg_trans, 3, get_pod_name_by_pattern, 'pgbench', 'my-ripsaw' ): try: if pgbench_pod[0] is not None: pgbench_client_pod = pgbench_pod[0] break except IndexError: log.info("Bench pod not ready yet") # Respin Ceph pod resource_osd = [f'{pod_name}'] log.info(f"Respin Ceph pod {pod_name}") disruption = disruption_helpers.Disruptions() for resource in resource_osd: disruption.set_resource(resource=resource) disruption.delete_resource() # Wait for pg_bench pod to initialized and complete log.info("Waiting for pgbench_client to complete") pod_obj = OCP(kind='pod') pod_obj.wait_for_resource( condition='Completed', resource_name=pgbench_client_pod, timeout=timeout, sleep=10, ) # Running pgbench and parsing logs output = run_cmd(f'oc logs {pgbench_client_pod}') pg_output = utils.parse_pgsql_logs(output) log.info( "*******PGBench output log*********\n" f"{pg_output}" ) for data in pg_output: latency_avg = data['latency_avg'] if not latency_avg: raise UnexpectedBehaviour( "PGBench failed to run, no data found on latency_avg" ) log.info("PGBench has completed successfully") # Collect data and export to Google doc spreadsheet g_sheet = GoogleSpreadSheetAPI(sheet_name="OCS PGSQL", sheet_index=2) for lat in pg_output: lat_avg = lat['latency_avg'] lat_stddev = lat['lat_stddev'] tps_incl = lat['tps_incl'] tps_excl = lat['tps_excl'] g_sheet.insert_row( [int(lat_avg), int(lat_stddev), int(tps_incl), int(tps_excl)], 2 ) # Clean up pgbench benchmark log.info("Deleting PG bench benchmark") pg_obj.delete()
def test_resource_deletion_during_pvc_expansion(self, resource_to_delete): """ Verify PVC expansion will succeed when rook-ceph, csi pods are re-spun during expansion """ pvc_size_expanded = 30 executor = ThreadPoolExecutor(max_workers=len(self.pvcs)) disruption_ops = disruption_helpers.Disruptions() # Run IO to fill some data log.info( "Running IO on all pods to fill some data before PVC expansion." ) for pod_obj in self.pods: storage_type = ( 'block' if pod_obj.pvc.volume_mode == 'Block' else 'fs' ) pod_obj.run_io( storage_type=storage_type, size='4G', io_direction='write', runtime=30, rate='10M', fio_filename=f'{pod_obj.name}_f1' ) log.info("Wait for IO to complete on pods") for pod_obj in self.pods: fio_result = pod_obj.get_fio_results() err_count = fio_result.get('jobs')[0].get('error') assert err_count == 0, ( f"IO error on pod {pod_obj.name}. " f"FIO result: {fio_result}" ) log.info(f"Verified IO on pod {pod_obj.name}.") log.info("IO is successful on all pods before PVC expansion.") # Select the pod to be deleted disruption_ops.set_resource(resource=resource_to_delete) log.info("Expanding all PVCs.") for pvc_obj in self.pvcs: log.info( f"Expanding size of PVC {pvc_obj.name} to {pvc_size_expanded}G" ) pvc_obj.expand_proc = executor.submit( pvc_obj.resize_pvc, pvc_size_expanded, True ) # Delete the pod 'resource_to_delete' disruption_ops.delete_resource() # Verify pvc expand status for pvc_obj in self.pvcs: assert pvc_obj.expand_proc.result(), ( f"Expansion failed for PVC {pvc_obj.name}" ) log.info("PVC expansion was successful on all PVCs") # Run IO to fill more data log.info("Write more data after PVC expansion.") for pod_obj in self.pods: storage_type = ( 'block' if pod_obj.pvc.volume_mode == 'Block' else 'fs' ) pod_obj.run_io( storage_type=storage_type, size='10G', io_direction='write', runtime=30, rate='10M', fio_filename=f'{pod_obj.name}_f2' ) log.info("Wait for IO to complete on all pods") for pod_obj in self.pods: fio_result = pod_obj.get_fio_results() err_count = fio_result.get('jobs')[0].get('error') assert err_count == 0, ( f"IO error on pod {pod_obj.name}. " f"FIO result: {fio_result}" ) log.info(f"Verified IO on pod {pod_obj.name}.") log.info("IO is successful on all pods after PVC expansion.")
def operations_base(self, resource_to_delete): """ Delete resource 'resource_to_delete' while PVCs creation, Pods creation and IO operation are progressing. Verifies PVCs can be re-used by creating new pods. Steps: 1. Create pods for running IO and verify they are Running. 2. Start creating more pods. 3. Start creating new PVCs. 4. Start IO on pods created in Step 1. 5. Delete the resource 'resource_to_delete'. 6. Verify that PVCs created in Step 3 are in Bound state. 7. Verify that pods created in Step 2 are Running. 8. Verify IO results. 9. Delete pods created in Steps 1 and 2. 10. Verify the total number of 'resource_to_delete' pods. 11. Verify volumes are unmapped from nodes after deleting pods. 12. Use all PVCs to create new pods. One PVC for one pod. 13. Start IO on all pods created in Step 10. 14. Verify IO results. """ # Separate the available PVCs pvc_objs_for_io_pods = self.pvc_objs[0:self.pvc_num_for_io_pods] pvc_objs_new_pods = self.pvc_objs[self.pvc_num_for_io_pods:] pod_functions = { 'mds': get_mds_pods, 'mon': get_mon_pods, 'mgr': get_mgr_pods, 'osd': get_osd_pods } executor = ThreadPoolExecutor(max_workers=2) disruption = disruption_helpers.Disruptions() disruption.set_resource(resource=resource_to_delete) # Get number of pods initial_pods_num = len(pod_functions[resource_to_delete]()) # Create pods for running IO io_pods = helpers.create_pods(pvc_objs_list=pvc_objs_for_io_pods, interface_type=self.interface, desired_status=constants.STATUS_RUNNING, wait=True, namespace=self.namespace) # Updating self.pod_objs for the purpose of teardown self.pod_objs.extend(io_pods) # Do setup for running IO on pods log.info("Setting up pods for running IO") for pod_obj in io_pods: pod_obj.workload_setup(storage_type='fs') log.info("Setup for running IO is completed on pods") # Start creating new pods log.info("Start creating new pods.") bulk_pod_create = executor.submit(helpers.create_pods, pvc_objs_list=pvc_objs_new_pods, interface_type=self.interface, wait=False, namespace=self.namespace) # Start creation of new PVCs log.info("Start creating new PVCs.") bulk_pvc_create = executor.submit(helpers.create_multiple_pvcs, sc_name=self.sc_obj.name, namespace=self.namespace, number_of_pvc=self.num_of_new_pvcs, size=self.pvc_size, wait=False) # Start IO on each pod log.info("Start IO on pods") for pod_obj in io_pods: pod_obj.run_io(storage_type='fs', size=f'{self.pvc_size_int - 1}G') log.info("IO started on all pods.") # Delete the resource disruption.delete_resource() # Getting result of PVC creation as list of PVC objects pvc_objs_new = bulk_pvc_create.result() # Updating self.pvc_objs_new for the purpose of teardown self.pvc_objs_new.extend(pvc_objs_new) # Verify PVCs are Bound for pvc_obj in pvc_objs_new: assert pvc_obj.ocp.wait_for_resource( condition=constants.STATUS_BOUND, resource_name=pvc_obj.name, timeout=240, sleep=10 ), (f"Wait timeout: PVC {pvc_obj.name} is not in 'Bound' status") log.info("Verified: New PVCs are Bound.") # Getting result of pods creation as list of Pod objects pod_objs_new = bulk_pod_create.result() # Updating self.pod_objs for the purpose of teardown self.pod_objs.extend(pod_objs_new) # Verify new pods are Running for pod_obj in pod_objs_new: assert pod_obj.ocp.wait_for_resource( condition=constants.STATUS_RUNNING, resource_name=pod_obj.name, timeout=240, sleep=10), ( f"Wait timeout: Pod {pod_obj.name} is not in 'Running' " f"state even after 120 seconds.") log.info("Verified: All pods are Running.") # Verify IO log.info("Fetching IO results.") for pod_obj in io_pods: get_fio_rw_iops(pod_obj) log.info("Verified IO result on pods.") all_pod_objs = io_pods + pod_objs_new # Fetch volume details from pods for the purpose of verification node_pv_dict = {} for pod in all_pod_objs: pod_info = pod.get() node = pod_info['spec']['nodeName'] pvc = pod_info['spec']['volumes'][0]['persistentVolumeClaim'][ 'claimName'] for pvc_obj in self.pvc_objs: if pvc_obj.name == pvc: pvc_obj.reload() pv = pvc_obj.backed_pv break if node in node_pv_dict: node_pv_dict[node].append(pv) else: node_pv_dict[node] = [pv] # Delete pods for pod_obj in all_pod_objs: pod_obj.delete(wait=False) # Verify pods are deleted for pod_obj in all_pod_objs: pod_obj.ocp.wait_for_delete(resource_name=pod_obj.name) # Updating self.pod_objs for the purpose of teardown self.pod_objs.clear() # Verify number of 'resource_to_delete' type pods final_pods_num = len(pod_functions[resource_to_delete]()) assert final_pods_num == initial_pods_num, ( f"Total number of {resource_to_delete} pods is not matching with " f"initial value. Total number of pods before deleting a pod: " f"{initial_pods_num}. Total number of pods present now: " f"{final_pods_num}") # Verify volumes are unmapped from nodes after deleting the pods for node, pvs in node_pv_dict.items(): cmd = f'oc debug nodes/{node} -- df' df_on_node = run_cmd(cmd) for pv in pvs: assert pv not in df_on_node, ( f"{pv} is still present on node {node} after " f"deleting the pods.") # Verify that PVCs are reusable by creating new pods all_pvc_objs = self.pvc_objs + pvc_objs_new pod_objs_re = helpers.create_pods( pvc_objs_list=all_pvc_objs, interface_type=self.interface, desired_status=constants.STATUS_RUNNING, wait=True, namespace=self.namespace) log.info("Successfully created new pods using all PVCs.") # Updating self.pod_objs for the purpose of teardown self.pod_objs.extend(pod_objs_re) # Run IO on each of the newly created pods for pod_obj in pod_objs_re: pod_obj.run_io(storage_type='fs', size='100M', runtime=10, fio_filename='fio-file-retest') log.info("Fetching IO results from newly created pods") for pod_obj in pod_objs_re: get_fio_rw_iops(pod_obj) log.info("Verified IO result on newly created pods.")
def test_ceph_daemon_kill_during_resource_creation( self, interface, operation_to_disrupt, resource_to_delete, multi_pvc_factory, pod_factory ): """ Base function for ceph daemon kill disruptive tests. Deletion of 'resource_to_delete' daemon will be introduced while 'operation_to_disrupt' is progressing. """ disruption = disruption_helpers.Disruptions() pod_functions = { 'mds': partial(pod.get_mds_pods), 'mon': partial(pod.get_mon_pods), 'mgr': partial(pod.get_mgr_pods), 'osd': partial(pod.get_osd_pods), 'rbdplugin': partial(pod.get_plugin_pods, interface=interface), 'cephfsplugin': partial(pod.get_plugin_pods, interface=interface), 'cephfsplugin_provisioner': partial( pod.get_cephfsplugin_provisioner_pods ), 'rbdplugin_provisioner': partial( pod.get_rbdfsplugin_provisioner_pods ), 'operator': partial(pod.get_operator_pods) } # Get number of pods of type 'resource_to_delete' num_of_resource_to_delete = len(pod_functions[resource_to_delete]()) num_of_pvc = 12 namespace = self.proj_obj.namespace # Fetch the number of Pods and PVCs initial_num_of_pods = len(pod.get_all_pods(namespace=namespace)) initial_num_of_pvc = len( get_all_pvcs(namespace=namespace)['items'] ) executor = ThreadPoolExecutor(max_workers=(2 * num_of_pvc)) disruption.set_resource(resource=resource_to_delete) disruption.select_daemon() access_modes = [constants.ACCESS_MODE_RWO] if interface == constants.CEPHFILESYSTEM: access_modes.append(constants.ACCESS_MODE_RWX) # Modify access_modes list to create rbd `block` type volume with # RWX access mode. RWX is not supported in non-block type rbd if interface == constants.CEPHBLOCKPOOL: access_modes.extend( [ f'{constants.ACCESS_MODE_RWO}-Block', f'{constants.ACCESS_MODE_RWX}-Block' ] ) # Start creation of PVCs bulk_pvc_create = executor.submit( multi_pvc_factory, interface=interface, project=self.proj_obj, size=8, access_modes=access_modes, access_modes_selection='distribute_random', status=constants.STATUS_BOUND, num_of_pvc=num_of_pvc, wait_each=False ) if operation_to_disrupt == 'create_pvc': # Ensure PVCs are being created before deleting the resource ret = helpers.wait_for_resource_count_change( get_all_pvcs, initial_num_of_pvc, namespace, 'increase' ) assert ret, "Wait timeout: PVCs are not being created." log.info("PVCs creation has started.") disruption.kill_daemon() pvc_objs = bulk_pvc_create.result() # Confirm that PVCs are Bound for pvc_obj in pvc_objs: helpers.wait_for_resource_state( resource=pvc_obj, state=constants.STATUS_BOUND, timeout=120 ) pvc_obj.reload() log.info("Verified: PVCs are Bound.") # Start creating pods bulk_pod_create = executor.submit( helpers.create_pods, pvc_objs, pod_factory, interface, 2 ) if operation_to_disrupt == 'create_pod': # Ensure that pods are being created before deleting the resource ret = helpers.wait_for_resource_count_change( pod.get_all_pods, initial_num_of_pods, namespace, 'increase' ) assert ret, "Wait timeout: Pods are not being created." log.info(f"Pods creation has started.") disruption.kill_daemon() pod_objs = bulk_pod_create.result() # Verify pods are Running for pod_obj in pod_objs: helpers.wait_for_resource_state( resource=pod_obj, state=constants.STATUS_RUNNING, timeout=180 ) pod_obj.reload() log.info("Verified: All pods are Running.") # Do setup on pods for running IO log.info("Setting up pods for running IO.") for pod_obj in pod_objs: pvc_info = pod_obj.pvc.get() if pvc_info['spec']['volumeMode'] == 'Block': storage_type = 'block' else: storage_type = 'fs' executor.submit(pod_obj.workload_setup, storage_type=storage_type) # Wait for setup on pods to complete for pod_obj in pod_objs: for sample in TimeoutSampler( 180, 2, getattr, pod_obj, 'wl_setup_done' ): if sample: log.info( f"Setup for running IO is completed on pod " f"{pod_obj.name}." ) break log.info("Setup for running IO is completed on all pods.") # Start IO on each pod for pod_obj in pod_objs: pvc_info = pod_obj.pvc.get() if pvc_info['spec']['volumeMode'] == 'Block': storage_type = 'block' else: storage_type = 'fs' pod_obj.run_io( storage_type=storage_type, size='2G', runtime=30, fio_filename=f'{pod_obj.name}_io_file1' ) log.info("FIO started on all pods.") if operation_to_disrupt == 'run_io': disruption.kill_daemon() log.info("Fetching FIO results.") for pod_obj in pod_objs: fio_result = pod_obj.get_fio_results() err_count = fio_result.get('jobs')[0].get('error') assert err_count == 0, ( f"FIO error on pod {pod_obj.name}. FIO result: {fio_result}" ) log.info("Verified FIO result on pods.") # Delete pods for pod_obj in pod_objs: pod_obj.delete(wait=True) for pod_obj in pod_objs: pod_obj.ocp.wait_for_delete(pod_obj.name) # Verify that PVCs are reusable by creating new pods pod_objs = helpers.create_pods(pvc_objs, pod_factory, interface, 2) # Verify new pods are Running for pod_obj in pod_objs: helpers.wait_for_resource_state( resource=pod_obj, state=constants.STATUS_RUNNING ) pod_obj.reload() log.info("Verified: All new pods are Running.") # Run IO on each of the new pods for pod_obj in pod_objs: pvc_info = pod_obj.pvc.get() if pvc_info['spec']['volumeMode'] == 'Block': storage_type = 'block' else: storage_type = 'fs' pod_obj.run_io( storage_type=storage_type, size='1G', runtime=10, fio_filename=f'{pod_obj.name}_io_file2' ) log.info("Fetching FIO results from new pods") for pod_obj in pod_objs: fio_result = pod_obj.get_fio_results() err_count = fio_result.get('jobs')[0].get('error') assert err_count == 0, ( f"FIO error on pod {pod_obj.name}. FIO result: {fio_result}" ) log.info("Verified FIO result on new pods.") # Verify number of pods of type 'resource_to_delete' final_num_resource_to_delete = len(pod_functions[resource_to_delete]()) assert final_num_resource_to_delete == num_of_resource_to_delete, ( f"Total number of {resource_to_delete} pods is not matching with " f"initial value. Total number of pods before deleting a pod: " f"{num_of_resource_to_delete}. Total number of pods present now: " f"{final_num_resource_to_delete}" ) # Check ceph status ceph_health_check(namespace=config.ENV_DATA['cluster_namespace']) log.info("Ceph cluster health is OK")