Exemplo n.º 1
0
    def test_respin_mcg_pod_and_check_data_integrity(self, mcg_obj, cld_mgr,
                                                     awscli_pod,
                                                     ns_resource_factory,
                                                     bucket_factory, mcg_pod):
        """
        Test Write to ns bucket using MCG RPC and read directly from AWS.
        Respin one of mcg pods when data are uploaded.

        """

        logger.info("Create the namespace resource and verify health")
        resource = ns_resource_factory()
        target_bucket_name = resource[0]
        ns_resource_name = resource[1]
        s3_creds = {
            "access_key_id": cld_mgr.aws_client.access_key,
            "access_key": cld_mgr.aws_client.secret_key,
            "endpoint": constants.MCG_NS_AWS_ENDPOINT,
            "region": self.DEFAULT_REGION,
        }

        logger.info(
            "Create the namespace bucket on top of the namespace resource")
        rand_ns_bucket = bucket_factory(
            amount=1,
            interface="mcg-namespace",
            write_ns_resource=ns_resource_name,
            read_ns_resources=[ns_resource_name],
        )[0].name

        logger.info("Upload files to NS bucket")
        self.write_files_to_pod_and_upload(mcg_obj,
                                           awscli_pod,
                                           bucket_to_write=rand_ns_bucket,
                                           amount=3)

        logger.info(f"Respin mcg resource {mcg_pod}")
        noobaa_pods = pod.get_noobaa_pods()
        pod_obj = [pod for pod in noobaa_pods
                   if pod.name.startswith(mcg_pod)][0]
        pod_obj.delete(force=True)
        logger.info("Wait for noobaa pods to come up")
        assert pod_obj.ocp.wait_for_resource(
            condition="Running",
            selector="app=noobaa",
            resource_count=len(noobaa_pods),
            timeout=1000,
        )
        logger.info("Wait for noobaa health to be OK")
        ceph_cluster_obj = CephCluster()
        ceph_cluster_obj.wait_for_noobaa_health_ok()

        logger.info("Read files directly from AWS")
        self.download_files(mcg_obj,
                            awscli_pod,
                            bucket_to_read=target_bucket_name,
                            s3_creds=s3_creds)

        logger.info("Compare between uploaded files and downloaded files")
        assert self.compare_dirs(awscli_pod, amount=3)
Exemplo n.º 2
0
    def test_rgw_host_node_failure(
        self, nodes, node_restart_teardown, mcg_obj, bucket_factory
    ):
        """
        Test case to fail node where RGW and Noobaa-db-0 hosting
        and verify new pod spuns on healthy node

        """
        # Get rgw pods
        rgw_pod_obj = get_rgw_pods()

        # Get nooba pods
        noobaa_pod_obj = get_noobaa_pods()

        # Get the node where noobaa-db hosted
        for noobaa_pod in noobaa_pod_obj:
            if noobaa_pod.name == "noobaa-db-0":
                noobaa_pod_node = get_pod_node(noobaa_pod)

        for rgw_pod in rgw_pod_obj:
            pod_node = rgw_pod.get().get("spec").get("nodeName")
            if pod_node == noobaa_pod_node.name:
                # Stop the node
                log.info(
                    f"Stopping node {pod_node} where"
                    f" rgw pod {rgw_pod.name} and noobaa-db-0 hosted"
                )
                node_obj = get_node_objs(node_names=[pod_node])
                nodes.stop_nodes(node_obj)

                # Validate old rgw pod went terminating state
                wait_for_resource_state(
                    resource=rgw_pod, state=constants.STATUS_TERMINATING, timeout=720
                )

                # Validate new rgw pod spun
                ocp_obj = OCP(
                    kind=constants.POD, namespace=defaults.ROOK_CLUSTER_NAMESPACE
                )
                ocp_obj.wait_for_resource(
                    condition=constants.STATUS_RUNNING,
                    resource_count=len(rgw_pod_obj),
                    selector=constants.RGW_APP_LABEL,
                )

                # Create OBC and read wnd write
                self.create_obc_creation(bucket_factory, mcg_obj, "Object-key-1")

                # Start the node
                nodes.start_nodes(node_obj)

                # Create OBC and read wnd write
                self.create_obc_creation(bucket_factory, mcg_obj, "Object-key-2")

        # Verify cluster health
        self.sanity_helpers.health_check()

        # Verify all storage pods are running
        wait_for_storage_pods()
Exemplo n.º 3
0
def recover_mcg():
    """
    Recovery procedure for noobaa by re-spinning the pods after mon recovery

    """
    logger.info("Re-spinning noobaa pods")
    for noobaa_pod in get_noobaa_pods():
        noobaa_pod.delete()
    for noobaa_pod in get_noobaa_pods():
        wait_for_resource_state(
            resource=noobaa_pod, state=constants.STATUS_RUNNING, timeout=600
        )
    if config.ENV_DATA["platform"].lower() in constants.ON_PREM_PLATFORMS:
        logger.info("Re-spinning RGW pods")
        for rgw_pod in get_rgw_pods():
            rgw_pod.delete()
        for rgw_pod in get_rgw_pods():
            wait_for_resource_state(
                resource=rgw_pod, state=constants.STATUS_RUNNING, timeout=600
            )
Exemplo n.º 4
0
    def scan_cluster(self):
        """
        Get accurate info on current state of pods
        """
        self._ceph_pods = pod.get_all_pods(self._namespace)
        # TODO: Workaround for BZ1748325:
        mons = pod.get_mon_pods(self.mon_selector, self.namespace)
        for mon in mons:
            if mon.ocp.get_resource_status(
                    mon.name) == constant.STATUS_RUNNING:
                self.mons.append(mon)
        # TODO: End of workaround for BZ1748325
        self.mdss = pod.get_mds_pods(self.mds_selector, self.namespace)
        self.mgrs = pod.get_mgr_pods(self.mgr_selector, self.namespace)
        self.osds = pod.get_osd_pods(self.osd_selector, self.namespace)
        self.noobaas = pod.get_noobaa_pods(self.noobaa_selector,
                                           self.namespace)
        self.rgws = pod.get_rgw_pods()
        self.toolbox = pod.get_ceph_tools_pod()

        # set port attrib on mon pods
        self.mons = list(map(self.set_port, self.mons))
        self.cluster.reload()
        if self.cephfs:
            self.cephfs.reload()
        else:
            try:
                self.cephfs_config = self.CEPHFS.get().get('items')[0]
                self.cephfs = ocs.OCS(**self.cephfs_config)
                self.cephfs.reload()
            except IndexError as e:
                logging.warning(e)
                logging.warning("No CephFS found")

        self.mon_count = len(self.mons)
        self.mds_count = len(self.mdss)
        self.mgr_count = len(self.mgrs)
        self.osd_count = len(self.osds)
        self.noobaa_count = len(self.noobaas)
        self.rgw_count = len(self.rgws)
Exemplo n.º 5
0
    def test_respin_mcg_pod_and_check_data_integrity_crd(
        self,
        mcg_obj,
        cld_mgr,
        awscli_pod,
        namespace_store_factory,
        bucket_factory,
        mcg_pod,
    ):
        """
        Test Write to ns bucket using CRDs and read directly from AWS.
        Respin one of mcg pods when data are uploaded.
        """

        logger.info("Create the namespace resources and verify health")
        nss_tup = ("oc", {"aws": [(1, self.DEFAULT_REGION)]})
        ns_store = namespace_store_factory(*nss_tup)[0]

        logger.info(
            "Create the namespace bucket on top of the namespace stores")
        bucketclass_dict = {
            "interface": "OC",
            "namespace_policy_dict": {
                "type": "Single",
                "namespacestores": [ns_store],
            },
        }
        logger.info(
            "Create the namespace bucket on top of the namespace resource")
        ns_bucket = bucket_factory(
            amount=1,
            interface=bucketclass_dict["interface"],
            bucketclass=bucketclass_dict,
        )[0].name
        s3_creds = {
            "access_key_id": cld_mgr.aws_client.access_key,
            "access_key": cld_mgr.aws_client.secret_key,
            "endpoint": constants.MCG_NS_AWS_ENDPOINT,
            "region": self.DEFAULT_REGION,
        }

        logger.info("Upload files to NS bucket")
        self.write_files_to_pod_and_upload(mcg_obj,
                                           awscli_pod,
                                           bucket_to_write=ns_bucket,
                                           amount=3)

        logger.info(f"Respin mcg resource {mcg_pod}")
        noobaa_pods = pod.get_noobaa_pods()
        pod_obj = [pod for pod in noobaa_pods
                   if pod.name.startswith(mcg_pod)][0]
        pod_obj.delete(force=True)
        logger.info("Wait for noobaa pods to come up")
        assert pod_obj.ocp.wait_for_resource(
            condition="Running",
            selector="app=noobaa",
            resource_count=len(noobaa_pods),
            timeout=1000,
        )
        logger.info("Wait for noobaa health to be OK")
        ceph_cluster_obj = CephCluster()
        ceph_cluster_obj.wait_for_noobaa_health_ok()

        logger.info("Read files directly from AWS")
        self.download_files(mcg_obj,
                            awscli_pod,
                            bucket_to_read=ns_store.uls_name,
                            s3_creds=s3_creds)

        logger.info("Compare between uploaded files and downloaded files")
        assert self.compare_dirs(awscli_pod, amount=3)
Exemplo n.º 6
0
    def test_noobaa_sts_host_node_failure(
        self,
        noobaa_sts,
        respin_noobaa_operator,
        mcg_obj,
        bucket_factory,
        nodes,
        node_restart_teardown,
    ):
        """
        Test case to fail node where NooBaa Statefulset pod (noobaa-core, noobaa-db)
        is hosted and verify the pod is rescheduled on a healthy node

        """
        executor = ThreadPoolExecutor(max_workers=1)
        pod_obj = OCP(kind=constants.POD,
                      namespace=constants.OPENSHIFT_STORAGE_NAMESPACE)

        # Get noobaa statefulset pod and node where it is hosted
        noobaa_sts_pod = get_noobaa_pods(
            noobaa_label=self.labels_map[noobaa_sts])[0]
        noobaa_sts_pod_node = get_pod_node(noobaa_sts_pod)
        log.info(
            f"{noobaa_sts_pod.name} is running on {noobaa_sts_pod_node.name}")

        # Get the NooBaa operator pod and node where it is hosted
        # Check if NooBaa operator and statefulset pod are hosted on same node
        noobaa_operator_pod = get_noobaa_pods(noobaa_label=self.labels_map[
            constants.NOOBAA_OPERATOR_DEPLOYMENT])[0]
        noobaa_operator_pod_node = get_pod_node(noobaa_operator_pod)
        log.info(
            f"{noobaa_operator_pod.name} is running on {noobaa_operator_pod_node.name}"
        )
        if noobaa_sts_pod_node.name == noobaa_operator_pod_node.name:
            operator_on_same_node = True
            log.info(
                f"{noobaa_sts_pod.name} and {noobaa_operator_pod.name} are running on same node."
            )
        else:
            operator_on_same_node = False
            log.info(
                f"{noobaa_sts_pod.name} and {noobaa_operator_pod.name} are running on different node."
            )

        # Stop the node
        log.info(
            f"Stopping {noobaa_sts_pod_node.name} where {noobaa_sts_pod.name} is hosted"
        )
        stop_thread = executor.submit(nodes.stop_nodes,
                                      nodes=[noobaa_sts_pod_node])
        node.wait_for_nodes_status(node_names=[noobaa_sts_pod_node.name],
                                   status=constants.NODE_NOT_READY)

        # Disrupt NooBaa operator
        if respin_noobaa_operator:
            noobaa_operator_pod.delete(force=True)

        # Check result of 'stop_thread'
        stop_thread.result()

        # Wait for NooBaa operator pod to reach terminating state if on same node
        # and not respun
        if operator_on_same_node and not respin_noobaa_operator:
            wait_for_resource_state(
                resource=noobaa_operator_pod,
                state=constants.STATUS_TERMINATING,
                timeout=360,
            )

        # Wait for NooBaa operator pod to reach running state
        pod_obj.wait_for_resource(
            condition=constants.STATUS_RUNNING,
            selector=self.labels_map[constants.NOOBAA_OPERATOR_DEPLOYMENT],
            resource_count=1,
        )

        # Verify NooBaa statefulset pod reschedules on another node
        try:
            for pod_list in TimeoutSampler(
                    60,
                    3,
                    get_noobaa_pods,
                    noobaa_label=self.labels_map[noobaa_sts],
            ):
                if len(pod_list) == 1:
                    pod_node = get_pod_node(pod_list[0])
                    if pod_node.name != noobaa_sts_pod_node.name:
                        log.info(
                            f"{pod_list[0].name} has been rescheduled on {pod_node.name}"
                        )
                        break
                    log.info(
                        f"Waiting for {noobaa_sts_pod.name} pod to be rescheduled"
                    )
        except TimeoutExpiredError:
            raise TimeoutExpiredError(
                f"{noobaa_sts_pod.name} pod not rescheduled within 60 seconds")

        # Wait for rescheduled pod to reach Running state.
        # For noobaa-db pod which is attached to a PV it may take more time (~8 minutes)
        # until the new pod can attach to the PV
        pod_obj.wait_for_resource(
            condition=constants.STATUS_RUNNING,
            selector=self.labels_map[noobaa_sts],
            resource_count=1,
            timeout=800
            if noobaa_sts == constants.NOOBAA_DB_STATEFULSET else 60,
            sleep=30 if noobaa_sts == constants.NOOBAA_DB_STATEFULSET else 3,
        )

        # Start the node
        log.info(
            f"Starting {noobaa_sts_pod_node.name} where {noobaa_sts_pod.name} was hosted"
        )
        nodes.start_nodes(nodes=[noobaa_sts_pod_node])
        node.wait_for_nodes_status(node_names=[noobaa_sts_pod_node.name],
                                   status=constants.NODE_READY)

        log.info("Wait for all pods to be in running state")
        wait_for_pods_to_be_running(timeout=300)

        # Check cluster health
        self.sanity_helpers.health_check()

        # Creates bucket then writes, reads and deletes objects
        self.sanity_helpers.obc_put_obj_create_delete(mcg_obj, bucket_factory)
Exemplo n.º 7
0
    def test_noobaa_rebuild(self, bucket_factory):
        """
        Test case to verify noobaa rebuild. Verifies KCS: https://access.redhat.com/solutions/5948631

        1. Stop the noobaa-operator by setting the replicas of noobaa-operator deployment to 0.
        2. Delete the noobaa deployments/statefulsets.
        3. Delete the PVC db-noobaa-db-0.
        4. Patch existing backingstores and bucketclasses to remove finalizer
        5. Delete the backingstores/bucketclass.
        6. Delete the noobaa secrets.
        7. Restart noobaa-operator by setting the replicas back to 1.
        8. Monitor the pods in openshift-storage for noobaa pods to be Running.

        """

        dep_ocp = OCP(kind=constants.DEPLOYMENT,
                      namespace=constants.OPENSHIFT_STORAGE_NAMESPACE)
        state_ocp = OCP(kind=constants.STATEFULSET,
                        namespace=constants.OPENSHIFT_STORAGE_NAMESPACE)
        noobaa_pvc_obj = get_pvc_objs(pvc_names=["db-noobaa-db-pg-0"])

        # Scale down noobaa operator
        logger.info(
            f"Scaling down {constants.NOOBAA_OPERATOR_DEPLOYMENT} deployment to replica: 0"
        )
        dep_ocp.exec_oc_cmd(
            f"scale deployment {constants.NOOBAA_OPERATOR_DEPLOYMENT} --replicas=0"
        )

        # Delete noobaa deployments and statefulsets
        logger.info("Deleting noobaa deployments and statefulsets")
        dep_ocp.delete(resource_name=constants.NOOBAA_ENDPOINT_DEPLOYMENT)
        state_ocp.delete(resource_name=constants.NOOBAA_DB_STATEFULSET)
        state_ocp.delete(resource_name=constants.NOOBAA_CORE_STATEFULSET)

        # Delete noobaa-db pvc
        pvc_obj = OCP(kind=constants.PVC,
                      namespace=constants.OPENSHIFT_STORAGE_NAMESPACE)
        logger.info("Deleting noobaa-db pvc")
        pvc_obj.delete(resource_name=noobaa_pvc_obj[0].name, wait=True)
        pvc_obj.wait_for_delete(resource_name=noobaa_pvc_obj[0].name,
                                timeout=300)

        # Patch and delete existing backingstores
        params = '{"metadata": {"finalizers":null}}'
        bs_obj = OCP(kind=constants.BACKINGSTORE,
                     namespace=constants.OPENSHIFT_STORAGE_NAMESPACE)
        for bs in bs_obj.get()["items"]:
            assert bs_obj.patch(
                resource_name=bs["metadata"]["name"],
                params=params,
                format_type="merge",
            ), "Failed to change the parameter in backingstore"
            logger.info(f"Deleting backingstore: {bs['metadata']['name']}")
            bs_obj.delete(resource_name=bs["metadata"]["name"])

        # Patch and delete existing bucketclass
        bc_obj = OCP(kind=constants.BUCKETCLASS,
                     namespace=constants.OPENSHIFT_STORAGE_NAMESPACE)
        for bc in bc_obj.get()["items"]:
            assert bc_obj.patch(
                resource_name=bc["metadata"]["name"],
                params=params,
                format_type="merge",
            ), "Failed to change the parameter in bucketclass"
            logger.info(f"Deleting bucketclass: {bc['metadata']['name']}")
            bc_obj.delete(resource_name=bc["metadata"]["name"])

        # Delete noobaa secrets
        logger.info("Deleting noobaa related secrets")
        dep_ocp.exec_oc_cmd(
            "delete secrets noobaa-admin noobaa-endpoints noobaa-operator noobaa-server"
        )

        # Scale back noobaa-operator deployment
        logger.info(
            f"Scaling back {constants.NOOBAA_OPERATOR_DEPLOYMENT} deployment to replica: 1"
        )
        dep_ocp.exec_oc_cmd(
            f"scale deployment {constants.NOOBAA_OPERATOR_DEPLOYMENT} --replicas=1"
        )

        # Wait and validate noobaa PVC is in bound state
        pvc_obj.wait_for_resource(
            condition=constants.STATUS_BOUND,
            resource_name=noobaa_pvc_obj[0].name,
            timeout=600,
            sleep=120,
        )

        # Validate noobaa pods are up and running
        pod_obj = OCP(kind=constants.POD,
                      namespace=defaults.ROOK_CLUSTER_NAMESPACE)
        noobaa_pods = get_noobaa_pods()
        pod_obj.wait_for_resource(
            condition=constants.STATUS_RUNNING,
            resource_count=len(noobaa_pods),
            selector=constants.NOOBAA_APP_LABEL,
            timeout=900,
        )

        # Verify everything running fine
        logger.info(
            "Verifying all resources are Running and matches expected result")
        self.sanity_helpers.health_check(tries=120)

        # Verify default backingstore/bucketclass
        default_bs = OCP(kind=constants.BACKINGSTORE,
                         namespace=constants.OPENSHIFT_STORAGE_NAMESPACE).get(
                             resource_name=DEFAULT_NOOBAA_BACKINGSTORE)
        default_bc = OCP(kind=constants.BUCKETCLASS,
                         namespace=constants.OPENSHIFT_STORAGE_NAMESPACE).get(
                             resource_name=DEFAULT_NOOBAA_BUCKETCLASS)
        assert (default_bs["status"]["phase"] == default_bc["status"]["phase"]
                == constants.STATUS_READY
                ), "Failed: Default bs/bc are not in ready state"

        # Create OBCs
        logger.info("Creating OBCs after noobaa rebuild")
        bucket_factory(amount=3, interface="OC", verify_health=True)
Exemplo n.º 8
0
    def test_rgw_host_node_failure(self, nodes, node_restart_teardown,
                                   node_drain_teardown, mcg_obj,
                                   bucket_factory):
        """
        Test case to fail node where RGW and the NooBaa DB are hosted
        and verify the new pods spin on a healthy node

        """

        # Get nooba pods
        noobaa_pod_obj = get_noobaa_pods()

        # Get the node where noobaa-db hosted
        noobaa_pod_node = None
        for noobaa_pod in noobaa_pod_obj:
            if noobaa_pod.name in [
                    constants.NB_DB_NAME_46_AND_BELOW,
                    constants.NB_DB_NAME_47_AND_ABOVE,
            ]:
                noobaa_pod_node = get_pod_node(noobaa_pod)
        if noobaa_pod_node is None:
            assert False, "Could not find the NooBaa DB pod"

        # Validate if RGW pod and noobaa-db are hosted on same node
        # If not, make sure both pods are hosted on same node
        log.info("Validate if RGW pod and noobaa-db are hosted on same node")
        rgw_pod_obj = get_rgw_pods()
        rgw_pod_node_list = [
            rgw_pod.get().get("spec").get("nodeName")
            for rgw_pod in rgw_pod_obj
        ]
        if not list(
                set(rgw_pod_node_list).intersection(
                    noobaa_pod_node.name.split())):
            log.info("Unschedule other two nodes such that RGW "
                     "pod moves to node where NooBaa DB pod hosted")
            worker_node_list = get_worker_nodes()
            node_names = list(
                set(worker_node_list) - set(noobaa_pod_node.name.split()))
            unschedule_nodes(node_names=node_names)
            ocp_obj = OCP(kind=constants.POD,
                          namespace=defaults.ROOK_CLUSTER_NAMESPACE)
            rgw_pod_obj[0].delete()
            ocp_obj.wait_for_resource(
                condition=constants.STATUS_RUNNING,
                resource_count=len(rgw_pod_obj),
                selector=constants.RGW_APP_LABEL,
                timeout=300,
                sleep=5,
            )
            log.info("Schedule those nodes again")
            schedule_nodes(node_names=node_names)

            # Check the ceph health OK
            ceph_health_check(tries=90, delay=15)

            # Verify all storage pods are running
            wait_for_storage_pods()

            # Check again the rgw pod move to node where NooBaa DB pod hosted
            rgw_pod_obj_list = get_rgw_pods()
            rgw_pod_node_list = [
                get_pod_node(rgw_pod_obj) for rgw_pod_obj in rgw_pod_obj_list
            ]
            value = [
                True if rgw_pod_node == noobaa_pod_node.name else False
                for rgw_pod_node in rgw_pod_node_list
            ]
            assert value, ("RGW Pod didn't move to node where NooBaa DB pod"
                           " hosted even after cordoned and uncordoned nodes"
                           f"RGW pod hosted: {rgw_pod_node_list}"
                           f"NooBaa DB pod hosted: {noobaa_pod_node.name}")

        log.info(
            "RGW and noobaa-db are hosted on same node start the test execution"
        )
        rgw_pod_obj = get_rgw_pods()
        for rgw_pod in rgw_pod_obj:
            pod_node = rgw_pod.get().get("spec").get("nodeName")
            if pod_node == noobaa_pod_node.name:
                # Stop the node
                log.info(f"Stopping node {pod_node} where"
                         f" rgw pod {rgw_pod.name} and NooBaa DB are hosted")
                node_obj = get_node_objs(node_names=[pod_node])
                nodes.stop_nodes(node_obj)

                # Validate old rgw pod went terminating state
                wait_for_resource_state(resource=rgw_pod,
                                        state=constants.STATUS_TERMINATING,
                                        timeout=720)

                # Validate new rgw pod spun
                ocp_obj = OCP(kind=constants.POD,
                              namespace=defaults.ROOK_CLUSTER_NAMESPACE)
                ocp_obj.wait_for_resource(
                    condition=constants.STATUS_RUNNING,
                    resource_count=len(rgw_pod_obj),
                    selector=constants.RGW_APP_LABEL,
                )

                # Start the node
                nodes.start_nodes(node_obj)

                # Check the ceph health OK
                ceph_health_check(tries=90, delay=15)

                # Verify all storage pods are running
                wait_for_storage_pods()

                # Create OBC and read wnd write
                self.create_obc_creation(bucket_factory, mcg_obj,
                                         "Object-key-2")

        # Verify cluster health
        self.sanity_helpers.health_check()
Exemplo n.º 9
0
    def test_noobaa_db_backup_and_recovery(
        self,
        pvc_factory,
        pod_factory,
        snapshot_factory,
        bucket_factory,
        rgw_bucket_factory,
    ):
        """
        Test case to verify noobaa backup and recovery

        1. Take snapshot db-noobaa-db-0 PVC and retore it to PVC
        2. Scale down the statefulset noobaa-db
        3. Get the yaml of the current PVC, db-noobaa-db-0 and
           change the parameter persistentVolumeReclaimPolicy to Retain for restored PVC
        4. Delete both PVCs, the PV for the original claim db-noobaa-db-0 will be removed.
           The PV for claim db-noobaa-db-0-snapshot-restore will move to ‘Released’
        5. Edit again restore PV and remove the claimRef section.
           The volume will transition to Available.
        6. Edit the yaml db-noobaa-db-0.yaml and change the setting volumeName to restored PVC.
        7. Scale up the stateful set again and the pod should be running

        """

        # Initialise variable
        self.noobaa_db_sst_name = "noobaa-db-pg"

        # Get noobaa pods before execution
        noobaa_pods = get_noobaa_pods()

        # Get noobaa PVC before execution
        noobaa_pvc_obj = get_pvc_objs(pvc_names=["db-noobaa-db-pg-0"])
        noobaa_pv_name = noobaa_pvc_obj[0].get("spec").get("spec").get(
            "volumeName")

        # Take snapshot db-noobaa-db-0 PVC
        log.info(f"Creating snapshot of the {noobaa_pvc_obj[0].name} PVC")
        snap_obj = snapshot_factory(
            pvc_obj=noobaa_pvc_obj[0],
            wait=True,
            snapshot_name=f"{noobaa_pvc_obj[0].name}-snapshot",
        )
        log.info(
            f"Successfully created snapshot {snap_obj.name} and in Ready state"
        )

        # Restore it to PVC
        log.info(f"Restoring snapshot {snap_obj.name} to create new PVC")
        sc_name = noobaa_pvc_obj[0].get().get("spec").get("storageClassName")
        pvc_size = (noobaa_pvc_obj[0].get().get("spec").get("resources").get(
            "requests").get("storage"))
        self.restore_pvc_obj = create_restore_pvc(
            sc_name=sc_name,
            snap_name=snap_obj.name,
            namespace=snap_obj.namespace,
            size=pvc_size,
            pvc_name=f"{snap_obj.name}-restore",
            volume_mode=snap_obj.parent_volume_mode,
            access_mode=snap_obj.parent_access_mode,
        )
        wait_for_resource_state(self.restore_pvc_obj, constants.STATUS_BOUND)
        self.restore_pvc_obj.reload()
        log.info(f"Succeesfuly created PVC {self.restore_pvc_obj.name} "
                 f"from snapshot {snap_obj.name}")

        # Scale down the statefulset noobaa-db
        modify_statefulset_replica_count(
            statefulset_name=self.noobaa_db_sst_name, replica_count=0
        ), f"Failed to scale down the statefulset {self.noobaa_db_sst_name}"

        # Get the noobaa-db PVC
        pvc_obj = OCP(kind=constants.PVC,
                      namespace=constants.OPENSHIFT_STORAGE_NAMESPACE)
        noobaa_pvc_yaml = pvc_obj.get(resource_name=noobaa_pvc_obj[0].name)

        # Get the restored noobaa PVC and
        # change the parameter persistentVolumeReclaimPolicy to Retain
        restored_noobaa_pvc_obj = get_pvc_objs(
            pvc_names=[f"{snap_obj.name}-restore"])
        restored_noobaa_pv_name = (restored_noobaa_pvc_obj[0].get("spec").get(
            "spec").get("volumeName"))
        pv_obj = OCP(kind=constants.PV,
                     namespace=constants.OPENSHIFT_STORAGE_NAMESPACE)
        params = '{"spec":{"persistentVolumeReclaimPolicy":"Retain"}}'
        assert pv_obj.patch(
            resource_name=restored_noobaa_pv_name, params=params), (
                "Failed to change the parameter persistentVolumeReclaimPolicy"
                f" to Retain {restored_noobaa_pv_name}")

        # Delete both PVCs
        delete_pvcs(pvc_objs=[noobaa_pvc_obj[0], restored_noobaa_pvc_obj[0]])

        # Validate original claim db-noobaa-db-0 removed
        assert validate_pv_delete(
            pv_name=noobaa_pv_name
        ), f"PV not deleted, still exist {noobaa_pv_name}"

        # Validate PV for claim db-noobaa-db-0-snapshot-restore is in Released state
        pv_obj.wait_for_resource(condition=constants.STATUS_RELEASED,
                                 resource_name=restored_noobaa_pv_name)

        # Edit again restore PV and remove the claimRef section
        log.info(
            f"Remove the claimRef section from PVC {restored_noobaa_pv_name}")
        params = '[{"op": "remove", "path": "/spec/claimRef"}]'
        pv_obj.patch(resource_name=restored_noobaa_pv_name,
                     params=params,
                     format_type="json")
        log.info(
            f"Successfully removed claimRef section from PVC {restored_noobaa_pv_name}"
        )

        # Validate PV is in Available state
        pv_obj.wait_for_resource(condition=constants.STATUS_AVAILABLE,
                                 resource_name=restored_noobaa_pv_name)

        # Edit the yaml db-noobaa-db-0.yaml and change the
        # setting volumeName to restored PVC
        noobaa_pvc_yaml["spec"]["volumeName"] = restored_noobaa_pv_name
        noobaa_pvc_yaml = OCS(**noobaa_pvc_yaml)
        noobaa_pvc_yaml.create()

        # Validate noobaa PVC is in bound state
        pvc_obj.wait_for_resource(
            condition=constants.STATUS_BOUND,
            resource_name=noobaa_pvc_obj[0].name,
            timeout=120,
        )

        # Scale up the statefulset again
        assert modify_statefulset_replica_count(
            statefulset_name=self.noobaa_db_sst_name, replica_count=1
        ), f"Failed to scale up the statefulset {self.noobaa_db_sst_name}"

        # Validate noobaa pod is up and running
        pod_obj = OCP(kind=constants.POD,
                      namespace=defaults.ROOK_CLUSTER_NAMESPACE)
        pod_obj.wait_for_resource(
            condition=constants.STATUS_RUNNING,
            resource_count=len(noobaa_pods),
            selector=constants.NOOBAA_APP_LABEL,
        )

        # Change the parameter persistentVolumeReclaimPolicy to Delete again
        params = '{"spec":{"persistentVolumeReclaimPolicy":"Delete"}}'
        assert pv_obj.patch(
            resource_name=restored_noobaa_pv_name, params=params), (
                "Failed to change the parameter persistentVolumeReclaimPolicy"
                f" to Delete {restored_noobaa_pv_name}")
        log.info(
            "Changed the parameter persistentVolumeReclaimPolicy to Delete again"
        )

        # Verify all storage pods are running
        wait_for_storage_pods()

        # Creating Resources
        log.info("Creating Resources using sanity helpers")
        self.sanity_helpers.create_resources(pvc_factory, pod_factory,
                                             bucket_factory,
                                             rgw_bucket_factory)
        # Deleting Resources
        self.sanity_helpers.delete_resources()

        # Verify everything running fine
        log.info(
            "Verifying All resources are Running and matches expected result")
        self.sanity_helpers.health_check(tries=120)
Exemplo n.º 10
0
    def factory(snapshot_factory=snapshot_factory):
        # Get noobaa pods before execution
        noobaa_pods = pod.get_noobaa_pods()

        # Get noobaa PVC before execution
        noobaa_pvc_obj = pvc.get_pvc_objs(pvc_names=["db-noobaa-db-pg-0"])
        noobaa_pv_name = noobaa_pvc_obj[0].get("spec").get("spec").get(
            "volumeName")

        # Take snapshot db-noobaa-db-0 PVC
        logger.info(f"Creating snapshot of the {noobaa_pvc_obj[0].name} PVC")
        snap_obj = snapshot_factory(
            pvc_obj=noobaa_pvc_obj[0],
            wait=True,
            snapshot_name=f"{noobaa_pvc_obj[0].name}-snapshot",
        )
        logger.info(
            f"Successfully created snapshot {snap_obj.name} and in Ready state"
        )

        # Restore it to PVC
        logger.info(f"Restoring snapshot {snap_obj.name} to create new PVC")
        sc_name = noobaa_pvc_obj[0].get().get("spec").get("storageClassName")
        pvc_size = (noobaa_pvc_obj[0].get().get("spec").get("resources").get(
            "requests").get("storage"))
        restore_pvc_obj = pvc.create_restore_pvc(
            sc_name=sc_name,
            snap_name=snap_obj.name,
            namespace=snap_obj.namespace,
            size=pvc_size,
            pvc_name=f"{snap_obj.name}-restore",
            volume_mode=snap_obj.parent_volume_mode,
            access_mode=snap_obj.parent_access_mode,
        )
        restore_pvc_objs.append(restore_pvc_obj)
        wait_for_resource_state(restore_pvc_obj, constants.STATUS_BOUND)
        restore_pvc_obj.reload()
        logger.info(f"Succeesfuly created PVC {restore_pvc_obj.name} "
                    f"from snapshot {snap_obj.name}")

        # Scale down the statefulset noobaa-db
        modify_statefulset_replica_count(
            statefulset_name=constants.NOOBAA_DB_STATEFULSET, replica_count=0
        ), f"Failed to scale down the statefulset {constants.NOOBAA_DB_STATEFULSET}"

        # Get the noobaa-db PVC
        pvc_obj = OCP(kind=constants.PVC,
                      namespace=constants.OPENSHIFT_STORAGE_NAMESPACE)
        noobaa_pvc_yaml = pvc_obj.get(resource_name=noobaa_pvc_obj[0].name)

        # Get the restored noobaa PVC and
        # change the parameter persistentVolumeReclaimPolicy to Retain
        restored_noobaa_pvc_obj = pvc.get_pvc_objs(
            pvc_names=[f"{snap_obj.name}-restore"])
        restored_noobaa_pv_name = (restored_noobaa_pvc_obj[0].get("spec").get(
            "spec").get("volumeName"))
        pv_obj = OCP(kind=constants.PV,
                     namespace=constants.OPENSHIFT_STORAGE_NAMESPACE)
        params = '{"spec":{"persistentVolumeReclaimPolicy":"Retain"}}'
        assert pv_obj.patch(
            resource_name=restored_noobaa_pv_name, params=params), (
                "Failed to change the parameter persistentVolumeReclaimPolicy"
                f" to Retain {restored_noobaa_pv_name}")

        # Delete both PVCs
        pvc.delete_pvcs(
            pvc_objs=[noobaa_pvc_obj[0], restored_noobaa_pvc_obj[0]])

        # Validate original claim db-noobaa-db-0 removed
        assert validate_pv_delete(
            pv_name=noobaa_pv_name
        ), f"PV not deleted, still exist {noobaa_pv_name}"

        # Validate PV for claim db-noobaa-db-0-snapshot-restore is in Released state
        pv_obj.wait_for_resource(condition=constants.STATUS_RELEASED,
                                 resource_name=restored_noobaa_pv_name)

        # Edit again restore PV and remove the claimRef section
        logger.info(
            f"Remove the claimRef section from PVC {restored_noobaa_pv_name}")
        params = '[{"op": "remove", "path": "/spec/claimRef"}]'
        pv_obj.patch(resource_name=restored_noobaa_pv_name,
                     params=params,
                     format_type="json")
        logger.info(
            f"Successfully removed claimRef section from PVC {restored_noobaa_pv_name}"
        )

        # Validate PV is in Available state
        pv_obj.wait_for_resource(condition=constants.STATUS_AVAILABLE,
                                 resource_name=restored_noobaa_pv_name)

        # Edit the yaml db-noobaa-db-0.yaml and change the
        # setting volumeName to restored PVC
        noobaa_pvc_yaml["spec"]["volumeName"] = restored_noobaa_pv_name
        noobaa_pvc_yaml = OCS(**noobaa_pvc_yaml)
        noobaa_pvc_yaml.create()

        # Validate noobaa PVC is in bound state
        pvc_obj.wait_for_resource(
            condition=constants.STATUS_BOUND,
            resource_name=noobaa_pvc_obj[0].name,
            timeout=120,
        )

        # Scale up the statefulset again
        assert modify_statefulset_replica_count(
            statefulset_name=constants.NOOBAA_DB_STATEFULSET, replica_count=1
        ), f"Failed to scale up the statefulset {constants.NOOBAA_DB_STATEFULSET}"

        # Validate noobaa pod is up and running
        pod_obj = OCP(kind=constants.POD,
                      namespace=defaults.ROOK_CLUSTER_NAMESPACE)
        pod_obj.wait_for_resource(
            condition=constants.STATUS_RUNNING,
            resource_count=len(noobaa_pods),
            selector=constants.NOOBAA_APP_LABEL,
        )

        # Change the parameter persistentVolumeReclaimPolicy to Delete again
        params = '{"spec":{"persistentVolumeReclaimPolicy":"Delete"}}'
        assert pv_obj.patch(
            resource_name=restored_noobaa_pv_name, params=params), (
                "Failed to change the parameter persistentVolumeReclaimPolicy"
                f" to Delete {restored_noobaa_pv_name}")
        logger.info(
            "Changed the parameter persistentVolumeReclaimPolicy to Delete again"
        )