def create_clone(self, clone_num): """ Creating clone for pvc, measure the creation time Args: clone_num (int) the number of clones to create Returns: str: The created clone name int: the creation time of the clone (in secs.) int: the csi creation time of the clone (in secs.) """ csi_start_time = self.get_time("csi") cloned_pvc_obj = pvc.create_pvc_clone( sc_name=self.pvc_obj.backed_sc, parent_pvc=self.pvc_obj.name, pvc_name=f"pvc-clone-pas-test-{clone_num}", clone_yaml=Interfaces_info[self.interface]["clone_yaml"], namespace=self.namespace, storage_size=f"{self.pvc_obj.size}Gi", ) helpers.wait_for_resource_state(cloned_pvc_obj, constants.STATUS_BOUND, 600) cloned_pvc_obj.reload() clone_name = cloned_pvc_obj.name create_time = performance_lib.measure_pvc_creation_time( self.interface, clone_name, csi_start_time) csi_create_time = performance_lib.csi_pvc_time_measure( self.interface, cloned_pvc_obj, "create", csi_start_time) del cloned_pvc_obj return (clone_name, create_time, csi_create_time)
def setup( self, interface, reclaim_policy, storageclass_factory, multi_pvc_factory, pod_factory, ): """ Create pvc and pod """ # Create storage class self.sc_obj = storageclass_factory(interface=interface, reclaim_policy=reclaim_policy) # Create PVCs self.pvc_objs = multi_pvc_factory( interface=interface, project=None, storageclass=self.sc_obj, size=5, status=constants.STATUS_BOUND, num_of_pvc=self.num_of_pvc, wait_each=False, ) # Create pods self.pod_objs = [] for pvc_obj in self.pvc_objs: self.pod_objs.append( pod_factory(interface=interface, pvc=pvc_obj, status=None)) for pod in self.pod_objs: wait_for_resource_state(pod, constants.STATUS_RUNNING) pod.reload()
def clone_pvc(self, teardown_factory): """ Clone PVC Args: teardown_factory: teardown fixture """ for pvc_obj in self.pvc_objs: logger.info( f"Clone pvc {pvc_obj.name} sc_name={pvc_obj.storageclass.name} size=2Gi, " f"access_mode={pvc_obj.access_mode},volume_mode={pvc_obj.get_pvc_vol_mode}" ) clone_yaml = (constants.CSI_CEPHFS_PVC_CLONE_YAML if pvc_obj.backed_sc == constants.CEPHFILESYSTEM_SC else constants.CSI_RBD_PVC_CLONE_YAML) cloned_pvc_obj = pvc.create_pvc_clone( sc_name=pvc_obj.backed_sc, parent_pvc=pvc_obj.name, clone_yaml=clone_yaml, namespace=pvc_obj.namespace, storage_size="2Gi", volume_mode=pvc_obj.get_pvc_vol_mode, access_mode=pvc_obj.access_mode, ) teardown_factory(cloned_pvc_obj) helpers.wait_for_resource_state(cloned_pvc_obj, constants.STATUS_BOUND) cloned_pvc_obj.reload()
def validate_registry_pod_status(): """ Function to validate registry pod status """ pod_objs = get_registry_pod_obj() for pod_obj in pod_objs: helpers.wait_for_resource_state(pod_obj, state=constants.STATUS_RUNNING)
def test_drain_mcg_pod_node(self, node_drain_teardown, reduce_and_resume_cluster_load, pod_to_drain): """ Test drianage of nodes which contain NB resources """ # Retrieve the relevant pod object pod_obj = pod.Pod(**pod.get_pods_having_label( label=self.labels_map[pod_to_drain], namespace=defaults.ROOK_CLUSTER_NAMESPACE, )[0]) # Retrieve the node name on which the pod resides node_name = pod_obj.get()["spec"]["nodeName"] # Drain the node drain_nodes([node_name]) # Verify the node was drained properly wait_for_nodes_status([node_name], status=constants.NODE_READY_SCHEDULING_DISABLED) # Retrieve the new pod that should've been created post-drainage pod_obj = pod.Pod(**pod.get_pods_having_label( label=self.labels_map[pod_to_drain], namespace=defaults.ROOK_CLUSTER_NAMESPACE, )[0]) # Verify that the new pod has reached a 'RUNNNING' status again and recovered successfully wait_for_resource_state(pod_obj, constants.STATUS_RUNNING, timeout=120) # Check the NB status to verify the system is healthy self.cl_obj.wait_for_noobaa_health_ok()
def test_delete_create_pvc_same_name(self, interface, pvc_factory, teardown_factory): """ Delete PVC and create a new PVC with same name """ # Create a PVC pvc_obj1 = pvc_factory( interface=interface, access_mode=constants.ACCESS_MODE_RWO, status=constants.STATUS_BOUND, ) # Delete the PVC logger.info(f"Deleting PVC {pvc_obj1.name}") pvc_obj1.delete() pvc_obj1.ocp.wait_for_delete(pvc_obj1.name) logger.info(f"Deleted PVC {pvc_obj1.name}") # Create a new PVC with same name logger.info(f"Creating new PVC with same name {pvc_obj1.name}") pvc_obj2 = helpers.create_pvc( sc_name=pvc_obj1.storageclass.name, pvc_name=pvc_obj1.name, namespace=pvc_obj1.project.namespace, do_reload=False, ) teardown_factory(pvc_obj2) # Check the new PVC and PV are Bound helpers.wait_for_resource_state(resource=pvc_obj2, state=constants.STATUS_BOUND) pv_obj2 = pvc_obj2.backed_pv_obj helpers.wait_for_resource_state(resource=pv_obj2, state=constants.STATUS_BOUND)
def wait_for_build_to_complete(self, timeout=900): """ Wait for build status to reach complete state Args: timeout (int): Time in seconds to wait """ log.info( f"Waiting for the build to reach {JENKINS_BUILD_COMPLETE} state") for project in self.projects: jenkins_builds = self.get_builds_sorted_by_number(project=project) for jenkins_build in jenkins_builds: if (jenkins_build.name, project) not in self.build_completed: try: wait_for_resource_state( resource=jenkins_build, state=JENKINS_BUILD_COMPLETE, timeout=timeout, ) self.get_build_duration_time( namespace=project, build_name=jenkins_build.name) except ResourceWrongStatusException: ocp_obj = OCP(namespace=project, kind="build") output = ocp_obj.describe( resource_name=jenkins_build.name) error_msg = ( f"{jenkins_build.name} did not reach to " f"{JENKINS_BUILD_COMPLETE} state after {timeout} sec\n" f"oc describe output of {jenkins_build.name} \n:{output}" ) log.error(error_msg) self.print_completed_builds_results() raise UnexpectedBehaviour(error_msg)
def test_ocs_347(self, resources): pod, pvc, storageclass = resources log.info("Creating RBD StorageClass") storageclass.append( helpers.create_storage_class( interface_type=constants.CEPHBLOCKPOOL, interface_name=self.cbp_obj.name, secret_name=self.rbd_secret_obj.name, ) ) log.info("Creating a PVC") pvc.append(helpers.create_pvc(sc_name=storageclass[0].name)) for pvc_obj in pvc: helpers.wait_for_resource_state(pvc_obj, constants.STATUS_BOUND) pvc_obj.reload() log.info(f"Creating a pod on with pvc {pvc[0].name}") pod_obj = helpers.create_pod( interface_type=constants.CEPHBLOCKPOOL, pvc_name=pvc[0].name, pod_dict_path=constants.NGINX_POD_YAML, ) pod.append(pod_obj) helpers.wait_for_resource_state(pod_obj, constants.STATUS_RUNNING) pod_obj.reload()
def create_pod_and_wait_for_completion(self, **kwargs): # Creating pod yaml file to run as a Job, the command to run on the pod and # arguments to it will replace in the create_pod function self.create_fio_pod_yaml(pvc_size=int(self.pvc_size), filesize=kwargs.pop("filesize", "1M")) # Create a pod logger.info(f"Creating Pod with pvc {self.pvc_obj.name}") try: self.pod_object = helpers.create_pod( pvc_name=self.pvc_obj.name, namespace=self.namespace, interface_type=self.interface, pod_name="pod-pas-test", pod_dict_path=self.pod_yaml_file.name, **kwargs, # pod_dict_path=constants.PERF_POD_YAML, ) except Exception as e: logger.exception( f"Pod attached to PVC {self.pod_object.name} was not created, exception [{str(e)}]" ) raise PodNotCreated("Pod attached to PVC was not created.") # Confirm that pod is running on the selected_nodes logger.info("Checking whether the pod is running") helpers.wait_for_resource_state( resource=self.pod_object, state=constants.STATUS_COMPLETED, timeout=self.timeout, )
def corrupt_ceph_monitors(): """ Corrupts ceph monitors by deleting store.db file """ mon_pods = get_mon_pods(namespace=constants.OPENSHIFT_STORAGE_NAMESPACE) for mon in mon_pods: logger.info(f"Corrupting monitor: {mon.name}") mon_id = mon.get().get("metadata").get("labels").get("ceph_daemon_id") _exec_cmd_on_pod( cmd=f"rm -rf /var/lib/ceph/mon/ceph-{mon_id}/store.db", pod_obj=mon ) try: wait_for_resource_state(resource=mon, state=constants.STATUS_CLBO) except ResourceWrongStatusException: if ( mon.ocp.get_resource(resource_name=mon.name, column="STATUS") != constants.STATUS_CLBO ): logger.info( f"Re-spinning monitor: {mon.name} since it did not reach CLBO state" ) mon.delete() logger.info("Validating all the monitors are in CLBO state") for mon in get_mon_pods(namespace=constants.OPENSHIFT_STORAGE_NAMESPACE): wait_for_resource_state(resource=mon, state=constants.STATUS_CLBO)
def create_mutiple_pvcs_statistics(self, num_of_samples, teardown_factory, pvc_size): """ Creates number (samples_num) of PVCs, measures creation time for each PVC and returns list of creation times. Args: num_of_samples: Number of the sampled created PVCs. teardown_factory: A fixture used when we want a new resource that was created during the tests. pvc_size: Size of the created PVCs. Returns: List of the creation times of all the created PVCs. """ time_measures = [] for i in range(num_of_samples): log.info(f"Start creation of PVC number {i + 1}.") pvc_obj = helpers.create_pvc(sc_name=self.sc_obj.name, size=pvc_size) helpers.wait_for_resource_state(pvc_obj, constants.STATUS_BOUND) pvc_obj.reload() teardown_factory(pvc_obj) create_time = helpers.measure_pvc_creation_time( self.interface, pvc_obj.name) logging.info(f"PVC created in {create_time} seconds") time_measures.append(create_time) return time_measures
def patch_sleep_on_mds(self): """ Patch the OSD deployments to sleep and remove the `livenessProbe` parameter, """ mds_dep = get_deployments_having_label( label=constants.MDS_APP_LABEL, namespace=constants.OPENSHIFT_STORAGE_NAMESPACE, ) mds_deployments = [OCS(**mds) for mds in mds_dep] for mds in mds_deployments: logger.info( f"Patching MDS: {mds.name} to remove livenessProbe and setting sleep infinity" ) params = '[{"op":"remove", "path":"/spec/template/spec/containers/0/livenessProbe"}]' self.dep_ocp.patch( resource_name=mds.name, params=params, format_type="json", ) params = ( '{"spec": {"template": {"spec": {"containers": ' '[{"name": "mds", "command": ["sleep", "infinity"], "args": []}]}}}}' ) self.dep_ocp.patch( resource_name=mds.name, params=params, ) logger.info("Sleeping for 60s and waiting for MDS pods to reach running state") time.sleep(60) for mds in get_mds_pods(namespace=constants.OPENSHIFT_STORAGE_NAMESPACE): wait_for_resource_state(resource=mds, state=constants.STATUS_RUNNING)
def test_pvc_deletion_measurement_performance(self, teardown_factory, pvc_size): """ Measuring PVC deletion time is within supported limits """ logging.info("Start creating new PVC") pvc_obj = helpers.create_pvc(sc_name=self.sc_obj.name, size=pvc_size) helpers.wait_for_resource_state(pvc_obj, constants.STATUS_BOUND) pvc_obj.reload() pv_name = pvc_obj.backed_pv pvc_reclaim_policy = pvc_obj.reclaim_policy teardown_factory(pvc_obj) pvc_obj.delete() logging.info("Start deletion of PVC") pvc_obj.ocp.wait_for_delete(pvc_obj.name) if pvc_reclaim_policy == constants.RECLAIM_POLICY_DELETE: helpers.validate_pv_delete(pvc_obj.backed_pv) delete_time = helpers.measure_pvc_deletion_time(self.interface, pv_name) # Deletion time for CephFS PVC is a little over 3 seconds deletion_time = 4 if self.interface == constants.CEPHFILESYSTEM else 3 logging.info(f"PVC deleted in {delete_time} seconds") if delete_time > deletion_time: raise ex.PerformanceException( f"PVC deletion time is {delete_time} and greater than {deletion_time} second" ) push_to_pvc_time_dashboard(self.interface, "1-pvc-deletion", delete_time)
def wait_for_jenkins_deploy_status(self, status, timeout=600): """ Wait for jenkins deploy pods status to reach running/completed Args: status (str): status to reach Running or Completed timeout (int): Time in seconds to wait """ log.info(f"Waiting for jenkins-deploy pods to be reach {status} state") for project in self.projects: jenkins_deploy_pods = self.get_jenkins_deploy_pods( namespace=project) for jenkins_deploy_pod in jenkins_deploy_pods: try: wait_for_resource_state(resource=jenkins_deploy_pod, state=status, timeout=timeout) except ResourceWrongStatusException: cmd = f"logs {jenkins_deploy_pod.name}" ocp_obj = OCP(namespace=project) output_log = ocp_obj.exec_oc_cmd(command=cmd, out_yaml_format=False) cmd = f"describe {jenkins_deploy_pod.name}" output_describe = ocp_obj.exec_oc_cmd( command=cmd, out_yaml_format=False) error_msg = ( f"{jenkins_deploy_pod.name} did not reach to " f"{status} state after {timeout} sec" f"\n output log {jenkins_deploy_pod.name}:\n{output_log}" f"\n output describe {jenkins_deploy_pod.name}:\n{output_describe}" ) log.error(error_msg) raise UnexpectedBehaviour(error_msg)
def wait_for_pgbench_status(self, status, timeout=None): """ Wait for pgbench benchmark pods status to reach running/completed Args: status (str): status to reach Running or Completed timeout (int): Time in seconds to wait """ """ Sometimes with the default values in the benchmark yaml the pgbench pod is not getting completed within the specified time and the tests are failing. I think it is varying with the infrastructure. So, for now we set the timeout to 30 mins and will start monitoring each pg bench pods for each run.Based on the results we will define the timeout again """ timeout = timeout if timeout else 1800 # Wait for pg_bench pods to initialized and running log.info(f"Waiting for pgbench pods to be reach {status} state") pgbench_pod_objs = self.get_pgbench_pods() for pgbench_pod_obj in pgbench_pod_objs: try: wait_for_resource_state(resource=pgbench_pod_obj, state=status, timeout=timeout) except ResourceWrongStatusException: output = run_cmd(f"oc logs {pgbench_pod_obj.name}") error_msg = f"{pgbench_pod_obj.name} did not reach to {status} state after {timeout} sec\n{output}" log.error(error_msg) raise UnexpectedBehaviour(error_msg)
def change_registry_backend_to_ocs(): """ Function to deploy registry with OCS backend. Raises: AssertionError: When failure in change of registry backend to OCS """ sc = helpers.default_storage_class(interface_type=constants.CEPHFILESYSTEM) pv_obj = helpers.create_pvc( sc_name=sc.name, pvc_name="registry-cephfs-rwx-pvc", namespace=constants.OPENSHIFT_IMAGE_REGISTRY_NAMESPACE, size="100Gi", access_mode=constants.ACCESS_MODE_RWX, ) helpers.wait_for_resource_state(pv_obj, "Bound") param_cmd = f'[{{"op": "add", "path": "/spec/storage", "value": {{"pvc": {{"claim": "{pv_obj.name}"}}}}}}]' run_cmd(f"oc patch {constants.IMAGE_REGISTRY_CONFIG} -p " f"'{param_cmd}' --type json") # Validate registry pod status retry((CommandFailed, UnexpectedBehaviour), tries=3, delay=15)(validate_registry_pod_status)() # Validate pvc mount in the registry pod retry((CommandFailed, UnexpectedBehaviour, AssertionError), tries=3, delay=15)(validate_pvc_mount_on_registry_pod)()
def create_pods(request): """ Create multiple pods """ class_instance = request.node.cls def finalizer(): """ Delete multiple pods """ if hasattr(class_instance, "pod_objs"): for pod in class_instance.pod_objs: pod.delete() request.addfinalizer(finalizer) class_instance.pod_objs = list() for pvc_obj in class_instance.pvc_objs: class_instance.pod_objs.append( helpers.create_pod( interface_type=class_instance.interface, pvc_name=pvc_obj.name, do_reload=False, namespace=class_instance.namespace, )) for pod in class_instance.pod_objs: helpers.wait_for_resource_state(pod, constants.STATUS_RUNNING)
def create_dc_pods(request): """ Create multiple deploymentconfig pods """ class_instance = request.node.cls def finalizer(): """ Delete multiple dc pods """ if hasattr(class_instance, "dc_pod_objs"): for pod in class_instance.dc_pod_objs: delete_deploymentconfig_pods(pod_obj=pod) request.addfinalizer(finalizer) class_instance.dc_pod_objs = [ helpers.create_pod( interface_type=class_instance.interface, pvc_name=pvc_obj.name, do_reload=False, namespace=class_instance.namespace, sa_name=class_instance.sa_obj.name, dc_deployment=True, replica_count=class_instance.replica_count, ) for pvc_obj in class_instance.pvc_objs ] for pod in class_instance.dc_pod_objs: helpers.wait_for_resource_state(pod, constants.STATUS_RUNNING, timeout=180)
def create_pvcs(request): """ Create multiple PVCs """ class_instance = request.node.cls def finalizer(): """ Delete multiple PVCs """ if hasattr(class_instance, "pvc_objs"): for pvc_obj in class_instance.pvc_objs: pvc_obj.reload() backed_pv_name = pvc_obj.backed_pv pvc_obj.delete() for pvc_obj in class_instance.pvc_objs: pvc_obj.ocp.wait_for_delete(pvc_obj.name) helpers.validate_pv_delete(backed_pv_name) request.addfinalizer(finalizer) class_instance.pvc_objs, _ = helpers.create_multiple_pvcs( sc_name=class_instance.sc_obj.name, number_of_pvc=class_instance.num_of_pvcs, size=class_instance.pvc_size, namespace=class_instance.namespace, ) for pvc_obj in class_instance.pvc_objs: helpers.wait_for_resource_state(pvc_obj, constants.STATUS_BOUND) pvc_obj.reload()
def test_pod_io( pre_upgrade_filesystem_pods, post_upgrade_filesystem_pods, pre_upgrade_block_pods, post_upgrade_block_pods, fio_project, resume_cluster_load, ): """ Test IO on multiple pods at the same time and finish IO on pods that were created before upgrade. """ log.info(f"Pods using filesystem created before upgrade: " f"{pre_upgrade_filesystem_pods}") log.info(f"Pods using filesystem created after upgrade: " f"{post_upgrade_filesystem_pods}") log.info(f"Pods using block device created before upgrade: " f"{pre_upgrade_block_pods}") log.info(f"Pods using block device created after upgrade: " f"{post_upgrade_block_pods}") pods = (pre_upgrade_block_pods + post_upgrade_block_pods + pre_upgrade_filesystem_pods + post_upgrade_filesystem_pods) job_obj = ocp.OCP(kind=constants.JOB, namespace=fio_project.namespace) for pod in pods: log.info("Checking that fio is still running") helpers.wait_for_resource_state(pod, constants.STATUS_RUNNING, timeout=600) job_name = pod.get_labels().get("job-name") job_obj.delete(resource_name=job_name)
def __init__(self): with open(constants.CSI_CEPHFS_POD_YAML, "r") as pod_fd: pod_info = yaml.safe_load(pod_fd) pvc_name = pod_info["spec"]["volumes"][0]["persistentVolumeClaim"][ "claimName"] self.pod_name = pod_info["metadata"]["name"] config.RUN["cli_params"]["teardown"] = True self.cephfs_pvc = helpers.create_pvc( sc_name=constants.DEFAULT_STORAGECLASS_CEPHFS, namespace=config.ENV_DATA["cluster_namespace"], pvc_name=pvc_name, size=SIZE, ) helpers.wait_for_resource_state(self.cephfs_pvc, constants.STATUS_BOUND, timeout=300) self.cephfs_pod = helpers.create_pod( interface_type=constants.CEPHFILESYSTEM, namespace=config.ENV_DATA["cluster_namespace"], pvc_name=pvc_name, node_name="compute-0", pod_name=self.pod_name, ) helpers.wait_for_resource_state(self.cephfs_pod, constants.STATUS_RUNNING, timeout=300) logging.info("pvc and cephfs pod created") self.ocp_obj = ocp.OCP( kind=constants.POD, namespace=config.ENV_DATA["cluster_namespace"], ) self.test_file_list = add_million_files(self.pod_name, self.ocp_obj) logging.info("cephfs test files created")
def remove_global_id_reclaim(): """ Removes global id warning by re-spinning client and mon pods """ csi_pods = [] interfaces = [constants.CEPHBLOCKPOOL, constants.CEPHFILESYSTEM] for interface in interfaces: plugin_pods = get_plugin_pods(interface) csi_pods += plugin_pods cephfs_provisioner_pods = get_cephfsplugin_provisioner_pods() rbd_provisioner_pods = get_rbdfsplugin_provisioner_pods() csi_pods += cephfs_provisioner_pods csi_pods += rbd_provisioner_pods for csi_pod in csi_pods: csi_pod.delete() for mds_pod in get_mds_pods(): mds_pod.delete() for mds_pod in get_mds_pods(): wait_for_resource_state(resource=mds_pod, state=constants.STATUS_RUNNING) for mon in get_mon_pods(namespace=constants.OPENSHIFT_STORAGE_NAMESPACE): mon.delete() mon_pods = get_mon_pods(namespace=constants.OPENSHIFT_STORAGE_NAMESPACE) for mon in mon_pods: wait_for_resource_state(resource=mon, state=constants.STATUS_RUNNING)
def noobaa_running_node_restart(pod_name): """ Function to restart node which has noobaa pod's running Args: pod_name (str): Name of noobaa pod """ nb_pod_obj = pod.get_pod_obj( (get_pod_name_by_pattern( pattern=pod_name, namespace=constants.OPENSHIFT_STORAGE_NAMESPACE))[0], namespace=constants.OPENSHIFT_STORAGE_NAMESPACE, ) nb_node_name = pod.get_pod_node(nb_pod_obj).name factory = platform_nodes.PlatformNodesFactory() nodes = factory.get_nodes_platform() nb_nodes = get_node_objs(node_names=nb_node_name) log.info(f"{pod_name} is running on {nb_node_name}") log.info(f"Restating node: {nb_node_name}....") nodes.restart_nodes_by_stop_and_start(nodes=nb_nodes, force=True) # Validate nodes are up and running wait_for_nodes_status() ceph_health_check(tries=30, delay=60) helpers.wait_for_resource_state(nb_pod_obj, constants.STATUS_RUNNING, timeout=180)
def create_pvc_and_deploymentconfig_pod(self, request, pvc_factory): """""" def finalizer(): delete_deploymentconfig_pods(pod_obj) request.addfinalizer(finalizer) # Create pvc pvc_obj = pvc_factory() # Create service_account to get privilege for deployment pods sa_name = helpers.create_serviceaccount(pvc_obj.project.namespace) helpers.add_scc_policy( sa_name=sa_name.name, namespace=pvc_obj.project.namespace ) pod_obj = helpers.create_pod( interface_type=constants.CEPHBLOCKPOOL, pvc_name=pvc_obj.name, namespace=pvc_obj.project.namespace, sa_name=sa_name.name, dc_deployment=True, ) helpers.wait_for_resource_state( resource=pod_obj, state=constants.STATUS_RUNNING ) return pod_obj, pvc_obj
def patch_sleep_on_osds(self): """ Patch the OSD deployments to sleep and remove the `livenessProbe` parameter, """ osd_dep = get_deployments_having_label( label=constants.OSD_APP_LABEL, namespace=constants.OPENSHIFT_STORAGE_NAMESPACE, ) osd_deployments = [OCS(**osd) for osd in osd_dep] for osd in osd_deployments: logger.info( f"Patching OSD: {osd.name} with livenessProbe and sleep infinity" ) params = '[{"op":"remove", "path":"/spec/template/spec/containers/0/livenessProbe"}]' self.dep_ocp.patch( resource_name=osd.name, params=params, format_type="json", ) params = ( '{"spec": {"template": {"spec": {"containers": [{"name": "osd", "command":' ' ["sleep", "infinity"], "args": []}]}}}}' ) self.dep_ocp.patch( resource_name=osd.name, params=params, ) logger.info( "Sleeping for 15 seconds and waiting for OSDs to reach running state" ) time.sleep(15) for osd in get_osd_pods(): wait_for_resource_state(resource=osd, state=constants.STATUS_RUNNING)
def test_rgw_host_node_failure( self, nodes, node_restart_teardown, mcg_obj, bucket_factory ): """ Test case to fail node where RGW and Noobaa-db-0 hosting and verify new pod spuns on healthy node """ # Get rgw pods rgw_pod_obj = get_rgw_pods() # Get nooba pods noobaa_pod_obj = get_noobaa_pods() # Get the node where noobaa-db hosted for noobaa_pod in noobaa_pod_obj: if noobaa_pod.name == "noobaa-db-0": noobaa_pod_node = get_pod_node(noobaa_pod) for rgw_pod in rgw_pod_obj: pod_node = rgw_pod.get().get("spec").get("nodeName") if pod_node == noobaa_pod_node.name: # Stop the node log.info( f"Stopping node {pod_node} where" f" rgw pod {rgw_pod.name} and noobaa-db-0 hosted" ) node_obj = get_node_objs(node_names=[pod_node]) nodes.stop_nodes(node_obj) # Validate old rgw pod went terminating state wait_for_resource_state( resource=rgw_pod, state=constants.STATUS_TERMINATING, timeout=720 ) # Validate new rgw pod spun ocp_obj = OCP( kind=constants.POD, namespace=defaults.ROOK_CLUSTER_NAMESPACE ) ocp_obj.wait_for_resource( condition=constants.STATUS_RUNNING, resource_count=len(rgw_pod_obj), selector=constants.RGW_APP_LABEL, ) # Create OBC and read wnd write self.create_obc_creation(bucket_factory, mcg_obj, "Object-key-1") # Start the node nodes.start_nodes(node_obj) # Create OBC and read wnd write self.create_obc_creation(bucket_factory, mcg_obj, "Object-key-2") # Verify cluster health self.sanity_helpers.health_check() # Verify all storage pods are running wait_for_storage_pods()
def test_automated_recovery_from_stopped_node_and_start( self, nodes, additional_node ): """ Knip-678 Automated recovery from failed nodes Reactive case - IPI 0) A - add new node, B - don't add new node 1) Stop node 2) Validate result: A - pods should respin on the new node B - pods should remain in Pending state on the stopped node 3) Start node 4) Validate result: A - pods should start on the new node B - pods should start on the stopped node after starting it """ wnode_name = get_worker_nodes()[0] machine_name = machine.get_machine_from_node_name(wnode_name) self.machineset_name = machine.get_machineset_from_machine_name(machine_name) self.start_ready_replica_count = machine.get_ready_replica_count( self.machineset_name ) temp_osd = get_osd_pods()[0] osd_real_name = "-".join(temp_osd.name.split("-")[:-1]) self.osd_worker_node = [get_pod_node(temp_osd)] if additional_node: self.add_new_storage_node(self.osd_worker_node[0].name) self.extra_node = True nodes.stop_nodes(self.osd_worker_node, wait=True) log.info(f"Successfully powered off node: {self.osd_worker_node[0].name}") timeout = 420 assert wait_for_rook_ceph_pod_status( temp_osd, constants.STATUS_TERMINATING, timeout ), ( f"The pod {osd_real_name} didn't reach the status {constants.STATUS_TERMINATING} " f"after {timeout} seconds" ) # Validate that the OSD in terminate state has a new OSD in Pending all_pod_obj = get_all_pods(namespace=defaults.ROOK_CLUSTER_NAMESPACE) new_osd = None for pod_obj in all_pod_obj: if osd_real_name == "-".join(pod_obj.name.split("-")[:-1]) and ( temp_osd.name != pod_obj.name ): new_osd = pod_obj break nodes.start_nodes(nodes=self.osd_worker_node, wait=True) log.info(f"Successfully powered on node: {self.osd_worker_node[0].name}") wait_for_resource_state(new_osd, constants.STATUS_RUNNING, timeout=180) if additional_node: new_osd_node = get_pod_node(new_osd) assert ( new_osd_node.name != self.osd_worker_node[0].name ), "New OSD is expected to run on the new additional node"
def validate_mon_pods(): """ Checks mon pods are running with retries """ mon_pods = get_mon_pods(namespace=constants.OPENSHIFT_STORAGE_NAMESPACE) for mon in mon_pods: wait_for_resource_state(resource=mon, state=constants.STATUS_RUNNING)
def pod_obj_list( self, interface, storageclass_factory, pod_factory, pvc_factory, teardown_factory, samples_num, pvc_size, ): """ Prepare sample pods for the test Returns: pod obj: List of pod instances """ self.interface = interface pod_result_list = [] self.msg_prefix = f"Interface: {self.interface}, PVC size: {pvc_size}." self.samples_num = samples_num self.pvc_size = pvc_size if self.interface == constants.CEPHBLOCKPOOL_THICK: self.sc_obj = storageclass_factory( interface=constants.CEPHBLOCKPOOL, new_rbd_pool=True, rbd_thick_provision=True, ) else: self.sc_obj = storageclass_factory(self.interface) for i in range(samples_num): logging.info( f"{self.msg_prefix} Start creating PVC number {i + 1}.") pvc_obj = helpers.create_pvc(sc_name=self.sc_obj.name, size=pvc_size) teardown_factory(pvc_obj) timeout = 600 if self.interface == constants.CEPHBLOCKPOOL_THICK else 60 helpers.wait_for_resource_state(pvc_obj, constants.STATUS_BOUND, timeout=timeout) pvc_obj.reload() logging.info( f"{self.msg_prefix} PVC number {i + 1} was successfully created ." ) pod_obj = pod_factory(interface=self.interface, pvc=pvc_obj, status=constants.STATUS_RUNNING) teardown_factory(pod_obj) pod_result_list.append(pod_obj) return pod_result_list
def test_start_pre_upgrade_pod_io(pause_cluster_load, pre_upgrade_pods_running_io): """ Confirm that there are pods created before upgrade. """ for pod in pre_upgrade_pods_running_io: log.info("Waiting for all fio pods to come up") helpers.wait_for_resource_state(pod, constants.STATUS_RUNNING, timeout=600)