示例#1
0
def setup_persistent_monitoring():
    """
    Change monitoring backend to OCS
    """
    sc = helpers.default_storage_class(interface_type=constants.CEPHBLOCKPOOL)

    # Get the list of monitoring pods
    pods_list = get_all_pods(
        namespace=defaults.OCS_MONITORING_NAMESPACE,
        selector=["prometheus", "alertmanager"],
    )

    # Create configmap cluster-monitoring-config and reconfigure
    # storage class and telemeter server (if the url is specified in a
    # config file)
    create_configmap_cluster_monitoring_pod(
        sc_name=sc.name,
        telemeter_server_url=config.ENV_DATA.get("telemeter_server_url"),
    )

    # Take some time to respin the pod
    waiting_time = 45
    logger.info(f"Waiting {waiting_time} seconds...")
    time.sleep(waiting_time)

    # Validate the pods are respinned and in running state
    retry((CommandFailed, ResourceWrongStatusException), tries=3,
          delay=15)(validate_pods_are_respinned_and_running_state)(pods_list)

    # Validate the pvc is created on monitoring pods
    validate_pvc_created_and_bound_on_monitoring_pods()

    # Validate the pvc are mounted on pods
    retry((CommandFailed, AssertionError), tries=3,
          delay=15)(validate_pvc_are_mounted_on_monitoring_pods)(pods_list)
    def test_running_cluster_mointoring_with_persistent_stoarge(self):
        """
        A test case to configure the persistent volume on monitoring pods
        """

        # Create configmap cluster-monitoring-config
        create_configmap_cluster_monitoring_pod(self.sc_obj.name)

        # Validate the pods are respinned and in running state
        validate_monitoring_pods_are_respinned_and_running_state(
            self.pods_list)

        # Validate the pvc is created on monitoring pods
        validate_pvc_created_and_bound_on_monitoring_pods()

        # Validate the pvc are mounted on pods
        validate_pvc_are_mounted_on_monitoring_pods(self.pods_list)
示例#3
0
    def deploy_ocs(self):
        """
        Handle OCS deployment, since OCS deployment steps are common to any
        platform, implementing OCS deployment here in base class.
        """
        ceph_cluster = ocp.OCP(kind='CephCluster', namespace=self.namespace)
        try:
            ceph_cluster.get().get('items')[0]
            logger.warning("OCS cluster already exists")
            return
        except (IndexError, CommandFailed):
            logger.info("Running OCS basic installation")
        self.deploy_ocs_via_operator()
        pod = ocp.OCP(kind=constants.POD, namespace=self.namespace)
        cfs = ocp.OCP(kind=constants.CEPHFILESYSTEM, namespace=self.namespace)
        # Check for Ceph pods
        assert pod.wait_for_resource(condition='Running',
                                     selector='app=rook-ceph-mon',
                                     resource_count=3,
                                     timeout=600)
        assert pod.wait_for_resource(condition='Running',
                                     selector='app=rook-ceph-mgr',
                                     timeout=600)
        assert pod.wait_for_resource(condition='Running',
                                     selector='app=rook-ceph-osd',
                                     resource_count=3,
                                     timeout=600)

        # validate ceph mon/osd volumes are backed by pvc
        validate_cluster_on_pvc()

        # validate PDB creation of MON, MDS, OSD pods
        validate_pdb_creation()

        # Creating toolbox pod
        setup_ceph_toolbox()

        assert pod.wait_for_resource(condition=constants.STATUS_RUNNING,
                                     selector='app=rook-ceph-tools',
                                     resource_count=1,
                                     timeout=600)

        # Check for CephFilesystem creation in ocp
        cfs_data = cfs.get()
        cfs_name = cfs_data['items'][0]['metadata']['name']

        if helpers.validate_cephfilesystem(cfs_name):
            logger.info("MDS deployment is successful!")
            defaults.CEPHFILESYSTEM_NAME = cfs_name
        else:
            logger.error("MDS deployment Failed! Please check logs!")

        # Change monitoring backend to OCS
        if config.ENV_DATA.get('monitoring_enabled') and config.ENV_DATA.get(
                'persistent-monitoring'):

            sc = helpers.default_storage_class(
                interface_type=constants.CEPHBLOCKPOOL)

            # Get the list of monitoring pods
            pods_list = get_all_pods(
                namespace=defaults.OCS_MONITORING_NAMESPACE,
                selector=['prometheus', 'alertmanager'])

            # Create configmap cluster-monitoring-config and reconfigure
            # storage class and telemeter server (if the url is specified in a
            # config file)
            create_configmap_cluster_monitoring_pod(
                sc_name=sc.name,
                telemeter_server_url=config.ENV_DATA.get(
                    "telemeter_server_url"))

            # Take some time to respin the pod
            waiting_time = 45
            logger.info(f"Waiting {waiting_time} seconds...")
            time.sleep(waiting_time)

            # Validate the pods are respinned and in running state
            retry((CommandFailed, ResourceWrongStatusException),
                  tries=3,
                  delay=15)(validate_pods_are_respinned_and_running_state)(
                      pods_list)

            # Validate the pvc is created on monitoring pods
            validate_pvc_created_and_bound_on_monitoring_pods()

            # Validate the pvc are mounted on pods
            retry((CommandFailed, AssertionError), tries=3, delay=15)(
                validate_pvc_are_mounted_on_monitoring_pods)(pods_list)
        elif config.ENV_DATA.get('monitoring_enabled') and config.ENV_DATA.get(
                "telemeter_server_url"):
            # Create configmap cluster-monitoring-config to reconfigure
            # telemeter server url when 'persistent-monitoring' is False
            create_configmap_cluster_monitoring_pod(
                telemeter_server_url=config.ENV_DATA["telemeter_server_url"])

        # Change registry backend to OCS CEPHFS RWX PVC
        registry.change_registry_backend_to_ocs()

        # Verify health of ceph cluster
        # TODO: move destroy cluster logic to new CLI usage pattern?
        logger.info("Done creating rook resources, waiting for HEALTH_OK")
        try:
            ceph_health_check(namespace=self.namespace, tries=30, delay=10)
        except CephHealthException as ex:
            err = str(ex)
            logger.warning(f"Ceph health check failed with {err}")
            if "clock skew detected" in err:
                logger.info(f"Changing NTP on compute nodes to"
                            f" {constants.RH_NTP_CLOCK}")
                update_ntp_compute_nodes()
                assert ceph_health_check(namespace=self.namespace,
                                         tries=60,
                                         delay=10)

        # patch gp2/thin storage class as 'non-default'
        self.patch_default_sc_to_non_default()
        if check_nodes_specs(min_cpu=constants.MIN_NODE_CPU,
                             min_memory=constants.MIN_NODE_MEMORY):
            logger.info("The cluster specs meet the minimum requirements and "
                        "therefore, NooBaa auto scale will be enabled")
            min_nb_eps = config.DEPLOYMENT.get('min_noobaa_endpoints')
            max_nb_eps = config.DEPLOYMENT.get('max_noobaa_endpoints')
            change_noobaa_endpoints_count(min_nb_eps=min_nb_eps,
                                          max_nb_eps=max_nb_eps)
        else:
            logger.warning(
                "The cluster specs do not meet the minimum requirements"
                " and therefore, NooBaa auto scale will remain disabled")
            change_noobaa_endpoints_count(min_nb_eps=1, max_nb_eps=1)
示例#4
0
    def deploy_ocs(self):
        """
        Handle OCS deployment, since OCS deployment steps are common to any
        platform, implementing OCS deployment here in base class.
        """
        _templating = templating.Templating()

        ceph_cluster = ocp.OCP(
            kind='CephCluster', namespace=self.namespace
        )
        try:
            ceph_cluster.get().get('items')[0]
            logger.warning("OCS cluster already exists")
            return
        except (IndexError, CommandFailed):
            logger.info("Running OCS basic installation")

        if not self.ocs_operator_deployment:
            create_oc_resource(
                'common.yaml', self.cluster_path, _templating, config.ENV_DATA
            )
            run_cmd(
                f'oc label namespace {config.ENV_DATA["cluster_namespace"]} '
                f'"openshift.io/cluster-monitoring=true"'
            )
            run_cmd(
                f"oc policy add-role-to-user view "
                f"system:serviceaccount:openshift-monitoring:prometheus-k8s "
                f"-n {self.namespace}"
            )
            # HACK: If you would like to drop this hack, make sure that you
            # also updated docs and write appropriate unit/integration tests
            # for config processing.
            if config.ENV_DATA.get('monitoring_enabled') in (
                "true", "True", True
            ):
                # RBAC rules for monitoring, based on documentation change in
                # rook:
                # https://github.com/rook/rook/commit/1b6fe840f6ae7372a9675ba727ecc65326708aa8
                # HACK: This should be dropped when OCS is managed by OLM
                apply_oc_resource(
                    'rbac.yaml',
                    self.cluster_path,
                    _templating,
                    config.ENV_DATA,
                    template_dir="monitoring"
                )
            # Increased to 15 seconds as 10 is not enough
            # TODO: do the sampler function and check if resource exist
            wait_time = 15
            logger.info(f"Waiting {wait_time} seconds...")
            time.sleep(wait_time)
            create_oc_resource(
                'operator-openshift.yaml', self.cluster_path,
                _templating, config.ENV_DATA
            )
            logger.info(f"Waiting {wait_time} seconds...")
            time.sleep(wait_time)
            run_cmd(
                f"oc wait --for condition=ready pod "
                f"-l app=rook-ceph-operator "
                f"-n {self.namespace} "
                f"--timeout=120s"
            )
            run_cmd(
                f"oc wait --for condition=ready pod "
                f"-l app=rook-discover "
                f"-n {self.namespace} "
                f"--timeout=120s"
            )
            create_oc_resource(
                'cluster.yaml', self.cluster_path, _templating, config.ENV_DATA
            )
        else:
            self.deploy_ocs_via_operator()

        pod = ocp.OCP(
            kind=constants.POD, namespace=self.namespace
        )
        cfs = ocp.OCP(
            kind=constants.CEPHFILESYSTEM,
            namespace=self.namespace
        )
        # Check for Ceph pods
        assert pod.wait_for_resource(
            condition='Running', selector='app=rook-ceph-mon',
            resource_count=3, timeout=600
        )
        assert pod.wait_for_resource(
            condition='Running', selector='app=rook-ceph-mgr',
            timeout=600
        )
        assert pod.wait_for_resource(
            condition='Running', selector='app=rook-ceph-osd',
            resource_count=3, timeout=600
        )

        # validate ceph mon/osd volumes are backed by pvc
        validate_cluster_on_pvc()

        # Creating toolbox pod
        setup_ceph_toolbox()

        assert pod.wait_for_resource(
            condition=constants.STATUS_RUNNING,
            selector='app=rook-ceph-tools', resource_count=1, timeout=600
        )

        if not self.ocs_operator_deployment:
            logger.info(f"Waiting {wait_time} seconds...")
            time.sleep(wait_time)
            # HACK: This should be dropped (including service-monitor.yaml and
            # prometheus-rules.yaml files) when OCS is managed by OLM
            if config.ENV_DATA.get('monitoring_enabled') not in (
                "true", "True", True
            ):
                # HACK: skip creation of rook-ceph-mgr service monitor when
                # monitoring is enabled (if this were not skipped, the step
                # would fail because rook would create the service monitor at
                # this point already)
                create_oc_resource(
                    "service-monitor.yaml", self.cluster_path, _templating,
                    config.ENV_DATA
                )
                # HACK: skip creation of prometheus-rules, rook-ceph is
                # concerned with it's setup now, based on clarification from
                # Umanga Chapagain
                create_oc_resource(
                    "prometheus-rules.yaml", self.cluster_path, _templating,
                    config.ENV_DATA
                )
            logger.info(f"Waiting {wait_time} seconds...")
            time.sleep(wait_time)

            # Create MDS pods for CephFileSystem
            fs_data = templating.load_yaml(constants.CEPHFILESYSTEM_YAML)
            fs_data['metadata']['namespace'] = self.namespace

            ceph_obj = OCS(**fs_data)
            ceph_obj.create()
            assert pod.wait_for_resource(
                condition=constants.STATUS_RUNNING, selector='app=rook-ceph-mds',
                resource_count=2, timeout=600
            )

        # Check for CephFilesystem creation in ocp
        cfs_data = cfs.get()
        cfs_name = cfs_data['items'][0]['metadata']['name']

        if helpers.validate_cephfilesystem(cfs_name):
            logger.info(f"MDS deployment is successful!")
            defaults.CEPHFILESYSTEM_NAME = cfs_name
        else:
            logger.error(
                f"MDS deployment Failed! Please check logs!"
            )

        if config.ENV_DATA.get('monitoring_enabled') and config.ENV_DATA.get('persistent-monitoring'):
            # Create a pool, secrets and sc
            secret_obj = helpers.create_secret(interface_type=constants.CEPHBLOCKPOOL)
            cbj_obj = helpers.create_ceph_block_pool()
            sc_obj = helpers.create_storage_class(
                interface_type=constants.CEPHBLOCKPOOL,
                interface_name=cbj_obj.name,
                secret_name=secret_obj.name
            )

            # Get the list of monitoring pods
            pods_list = get_all_pods(
                namespace=defaults.OCS_MONITORING_NAMESPACE,
                selector=['prometheus', 'alertmanager']
            )

            # Create configmap cluster-monitoring-config
            create_configmap_cluster_monitoring_pod(sc_obj.name)

            # Take some time to respin the pod
            waiting_time = 30
            logger.info(f"Waiting {waiting_time} seconds...")
            time.sleep(waiting_time)

            # Validate the pods are respinned and in running state
            validate_pods_are_respinned_and_running_state(
                pods_list
            )

            # Validate the pvc is created on monitoring pods
            validate_pvc_created_and_bound_on_monitoring_pods()

            # Validate the pvc are mounted on pods
            validate_pvc_are_mounted_on_monitoring_pods(pods_list)

        # Change registry backend to OCS CEPHFS RWX PVC
        registry.change_registry_backend_to_ocs()

        # Verify health of ceph cluster
        # TODO: move destroy cluster logic to new CLI usage pattern?
        logger.info("Done creating rook resources, waiting for HEALTH_OK")
        assert ceph_health_check(
            namespace=self.namespace
        )
        # patch gp2/thin storage class as 'non-default'
        self.patch_default_sc_to_non_default()
示例#5
0
    def deploy_ocs(self):
        """
        Handle OCS deployment, since OCS deployment steps are common to any
        platform, implementing OCS deployment here in base class.
        """
        ceph_cluster = ocp.OCP(kind='CephCluster', namespace=self.namespace)
        try:
            ceph_cluster.get().get('items')[0]
            logger.warning("OCS cluster already exists")
            return
        except (IndexError, CommandFailed):
            logger.info("Running OCS basic installation")
        self.deploy_ocs_via_operator()
        pod = ocp.OCP(kind=constants.POD, namespace=self.namespace)
        cfs = ocp.OCP(kind=constants.CEPHFILESYSTEM, namespace=self.namespace)
        # Check for Ceph pods
        assert pod.wait_for_resource(condition='Running',
                                     selector='app=rook-ceph-mon',
                                     resource_count=3,
                                     timeout=600)
        assert pod.wait_for_resource(condition='Running',
                                     selector='app=rook-ceph-mgr',
                                     timeout=600)
        assert pod.wait_for_resource(condition='Running',
                                     selector='app=rook-ceph-osd',
                                     resource_count=3,
                                     timeout=600)

        # validate ceph mon/osd volumes are backed by pvc
        validate_cluster_on_pvc()

        # validate PDB creation of MON, MDS, OSD pods
        validate_pdb_creation()

        # Creating toolbox pod
        setup_ceph_toolbox()

        assert pod.wait_for_resource(condition=constants.STATUS_RUNNING,
                                     selector='app=rook-ceph-tools',
                                     resource_count=1,
                                     timeout=600)

        # Workaround for https://bugzilla.redhat.com/show_bug.cgi?id=1847098
        if config.DEPLOYMENT.get('local_storage'):
            tools_pod = run_cmd(
                f"oc -n {self.namespace} get pod -l 'app=rook-ceph-tools' "
                f"-o jsonpath='{{.items[0].metadata.name}}'")
            pgs_to_autoscale = [
                'ocs-storagecluster-cephblockpool',
                'ocs-storagecluster-cephfilesystem-data0'
            ]
            for pg in pgs_to_autoscale:
                run_cmd(f"oc -n {self.namespace} exec {tools_pod} -- "
                        f"ceph osd pool set {pg} pg_autoscale_mode on")

        # Check for CephFilesystem creation in ocp
        cfs_data = cfs.get()
        cfs_name = cfs_data['items'][0]['metadata']['name']

        if helpers.validate_cephfilesystem(cfs_name):
            logger.info("MDS deployment is successful!")
            defaults.CEPHFILESYSTEM_NAME = cfs_name
        else:
            logger.error("MDS deployment Failed! Please check logs!")

        # Change monitoring backend to OCS
        if config.ENV_DATA.get('monitoring_enabled') and config.ENV_DATA.get(
                'persistent-monitoring'):

            sc = helpers.default_storage_class(
                interface_type=constants.CEPHBLOCKPOOL)

            # Get the list of monitoring pods
            pods_list = get_all_pods(
                namespace=defaults.OCS_MONITORING_NAMESPACE,
                selector=['prometheus', 'alertmanager'])

            # Create configmap cluster-monitoring-config and reconfigure
            # storage class and telemeter server (if the url is specified in a
            # config file)
            create_configmap_cluster_monitoring_pod(
                sc_name=sc.name,
                telemeter_server_url=config.ENV_DATA.get(
                    "telemeter_server_url"))

            # Take some time to respin the pod
            waiting_time = 45
            logger.info(f"Waiting {waiting_time} seconds...")
            time.sleep(waiting_time)

            # Validate the pods are respinned and in running state
            retry((CommandFailed, ResourceWrongStatusException),
                  tries=3,
                  delay=15)(validate_pods_are_respinned_and_running_state)(
                      pods_list)

            # Validate the pvc is created on monitoring pods
            validate_pvc_created_and_bound_on_monitoring_pods()

            # Validate the pvc are mounted on pods
            retry((CommandFailed, AssertionError), tries=3, delay=15)(
                validate_pvc_are_mounted_on_monitoring_pods)(pods_list)
        elif config.ENV_DATA.get('monitoring_enabled') and config.ENV_DATA.get(
                "telemeter_server_url"):
            # Create configmap cluster-monitoring-config to reconfigure
            # telemeter server url when 'persistent-monitoring' is False
            create_configmap_cluster_monitoring_pod(
                telemeter_server_url=config.ENV_DATA["telemeter_server_url"])

        # Change registry backend to OCS CEPHFS RWX PVC
        registry.change_registry_backend_to_ocs()

        # Verify health of ceph cluster
        # TODO: move destroy cluster logic to new CLI usage pattern?
        logger.info("Done creating rook resources, waiting for HEALTH_OK")
        assert ceph_health_check(namespace=self.namespace)
        # patch gp2/thin storage class as 'non-default'
        self.patch_default_sc_to_non_default()
示例#6
0
    def deploy_ocs(self):
        """
        Handle OCS deployment, since OCS deployment steps are common to any
        platform, implementing OCS deployment here in base class.
        """
        ceph_cluster = ocp.OCP(kind="CephCluster", namespace=self.namespace)
        try:
            ceph_cluster.get().get("items")[0]
            logger.warning("OCS cluster already exists")
            return
        except (IndexError, CommandFailed):
            logger.info("Running OCS basic installation")

        if config.DEPLOYMENT["external_mode"]:
            logger.info("Deploying OCS on external mode RHCS")
            return self.deploy_with_external_mode()
        self.deploy_ocs_via_operator()
        pod = ocp.OCP(kind=constants.POD, namespace=self.namespace)
        cfs = ocp.OCP(kind=constants.CEPHFILESYSTEM, namespace=self.namespace)
        # Check for Ceph pods
        assert pod.wait_for_resource(
            condition="Running",
            selector="app=rook-ceph-mon",
            resource_count=3,
            timeout=600,
        )
        assert pod.wait_for_resource(condition="Running",
                                     selector="app=rook-ceph-mgr",
                                     timeout=600)
        assert pod.wait_for_resource(
            condition="Running",
            selector="app=rook-ceph-osd",
            resource_count=3,
            timeout=600,
        )

        # validate ceph mon/osd volumes are backed by pvc
        validate_cluster_on_pvc()

        # validate PDB creation of MON, MDS, OSD pods
        validate_pdb_creation()

        # Creating toolbox pod
        setup_ceph_toolbox()

        assert pod.wait_for_resource(
            condition=constants.STATUS_RUNNING,
            selector="app=rook-ceph-tools",
            resource_count=1,
            timeout=600,
        )

        # Check for CephFilesystem creation in ocp
        cfs_data = cfs.get()
        cfs_name = cfs_data["items"][0]["metadata"]["name"]

        if helpers.validate_cephfilesystem(cfs_name):
            logger.info("MDS deployment is successful!")
            defaults.CEPHFILESYSTEM_NAME = cfs_name
        else:
            logger.error("MDS deployment Failed! Please check logs!")

        # Change monitoring backend to OCS
        if config.ENV_DATA.get("monitoring_enabled") and config.ENV_DATA.get(
                "persistent-monitoring"):

            sc = helpers.default_storage_class(
                interface_type=constants.CEPHBLOCKPOOL)

            # Get the list of monitoring pods
            pods_list = get_all_pods(
                namespace=defaults.OCS_MONITORING_NAMESPACE,
                selector=["prometheus", "alertmanager"],
            )

            # Create configmap cluster-monitoring-config and reconfigure
            # storage class and telemeter server (if the url is specified in a
            # config file)
            create_configmap_cluster_monitoring_pod(
                sc_name=sc.name,
                telemeter_server_url=config.ENV_DATA.get(
                    "telemeter_server_url"),
            )

            # Take some time to respin the pod
            waiting_time = 45
            logger.info(f"Waiting {waiting_time} seconds...")
            time.sleep(waiting_time)

            # Validate the pods are respinned and in running state
            retry((CommandFailed, ResourceWrongStatusException),
                  tries=3,
                  delay=15)(validate_pods_are_respinned_and_running_state)(
                      pods_list)

            # Validate the pvc is created on monitoring pods
            validate_pvc_created_and_bound_on_monitoring_pods()

            # Validate the pvc are mounted on pods
            retry((CommandFailed, AssertionError), tries=3, delay=15)(
                validate_pvc_are_mounted_on_monitoring_pods)(pods_list)
        elif config.ENV_DATA.get("monitoring_enabled") and config.ENV_DATA.get(
                "telemeter_server_url"):
            # Create configmap cluster-monitoring-config to reconfigure
            # telemeter server url when 'persistent-monitoring' is False
            create_configmap_cluster_monitoring_pod(
                telemeter_server_url=config.ENV_DATA["telemeter_server_url"])

        # Change registry backend to OCS CEPHFS RWX PVC
        registry.change_registry_backend_to_ocs()

        # Verify health of ceph cluster
        # TODO: move destroy cluster logic to new CLI usage pattern?
        logger.info("Done creating rook resources, waiting for HEALTH_OK")
        try:
            ceph_health_check(namespace=self.namespace, tries=30, delay=10)
        except CephHealthException as ex:
            err = str(ex)
            logger.warning(f"Ceph health check failed with {err}")
            if "clock skew detected" in err:
                logger.info(f"Changing NTP on compute nodes to"
                            f" {constants.RH_NTP_CLOCK}")
                if self.platform == constants.VSPHERE_PLATFORM:
                    update_ntp_compute_nodes()
                assert ceph_health_check(namespace=self.namespace,
                                         tries=60,
                                         delay=10)

        # patch gp2/thin storage class as 'non-default'
        self.patch_default_sc_to_non_default()
示例#7
0
    def deploy_ocs(self):
        """
        Handle OCS deployment, since OCS deployment steps are common to any
        platform, implementing OCS deployment here in base class.
        """
        set_registry_to_managed_state()
        image = None
        ceph_cluster = ocp.OCP(kind="CephCluster", namespace=self.namespace)
        try:
            ceph_cluster.get().get("items")[0]
            logger.warning("OCS cluster already exists")
            return
        except (IndexError, CommandFailed):
            logger.info("Running OCS basic installation")

        # disconnected installation?
        load_cluster_info()
        if config.DEPLOYMENT.get("disconnected"):
            image = prepare_disconnected_ocs_deployment()

        if config.DEPLOYMENT["external_mode"]:
            self.deploy_with_external_mode()
        else:
            self.deploy_ocs_via_operator(image)
            pod = ocp.OCP(kind=constants.POD, namespace=self.namespace)
            cfs = ocp.OCP(kind=constants.CEPHFILESYSTEM,
                          namespace=self.namespace)
            # Check for Ceph pods
            mon_pod_timeout = (900 if self.platform
                               == constants.IBMCLOUD_PLATFORM else 600)
            assert pod.wait_for_resource(
                condition="Running",
                selector="app=rook-ceph-mon",
                resource_count=3,
                timeout=mon_pod_timeout,
            )
            assert pod.wait_for_resource(condition="Running",
                                         selector="app=rook-ceph-mgr",
                                         timeout=600)
            assert pod.wait_for_resource(
                condition="Running",
                selector="app=rook-ceph-osd",
                resource_count=3,
                timeout=600,
            )

            # validate ceph mon/osd volumes are backed by pvc
            validate_cluster_on_pvc()

            # validate PDB creation of MON, MDS, OSD pods
            validate_pdb_creation()

            # Creating toolbox pod
            setup_ceph_toolbox()

            assert pod.wait_for_resource(
                condition=constants.STATUS_RUNNING,
                selector="app=rook-ceph-tools",
                resource_count=1,
                timeout=600,
            )

            if not config.COMPONENTS["disable_cephfs"]:
                # Check for CephFilesystem creation in ocp
                cfs_data = cfs.get()
                cfs_name = cfs_data["items"][0]["metadata"]["name"]

                if helpers.validate_cephfilesystem(cfs_name):
                    logger.info("MDS deployment is successful!")
                    defaults.CEPHFILESYSTEM_NAME = cfs_name
                else:
                    logger.error("MDS deployment Failed! Please check logs!")

        # Change monitoring backend to OCS
        if config.ENV_DATA.get("monitoring_enabled") and config.ENV_DATA.get(
                "persistent-monitoring"):
            setup_persistent_monitoring()
        elif config.ENV_DATA.get("monitoring_enabled") and config.ENV_DATA.get(
                "telemeter_server_url"):
            # Create configmap cluster-monitoring-config to reconfigure
            # telemeter server url when 'persistent-monitoring' is False
            create_configmap_cluster_monitoring_pod(
                telemeter_server_url=config.ENV_DATA["telemeter_server_url"])

        if not config.COMPONENTS["disable_cephfs"]:
            # Change registry backend to OCS CEPHFS RWX PVC
            registry.change_registry_backend_to_ocs()

        # Verify health of ceph cluster
        logger.info("Done creating rook resources, waiting for HEALTH_OK")
        try:
            ceph_health_check(namespace=self.namespace, tries=30, delay=10)
        except CephHealthException as ex:
            err = str(ex)
            logger.warning(f"Ceph health check failed with {err}")
            if "clock skew detected" in err:
                logger.info(f"Changing NTP on compute nodes to"
                            f" {constants.RH_NTP_CLOCK}")
                if self.platform == constants.VSPHERE_PLATFORM:
                    update_ntp_compute_nodes()
                assert ceph_health_check(namespace=self.namespace,
                                         tries=60,
                                         delay=10)

        # patch gp2/thin storage class as 'non-default'
        self.patch_default_sc_to_non_default()
示例#8
0
    def deploy_ocs(self):
        """
        Handle OCS deployment, since OCS deployment steps are common to any
        platform, implementing OCS deployment here in base class.
        """
        ceph_cluster = ocp.OCP(
            kind='CephCluster', namespace=self.namespace
        )
        try:
            ceph_cluster.get().get('items')[0]
            logger.warning("OCS cluster already exists")
            return
        except (IndexError, CommandFailed):
            logger.info("Running OCS basic installation")
        self.deploy_ocs_via_operator()
        pod = ocp.OCP(
            kind=constants.POD, namespace=self.namespace
        )
        cfs = ocp.OCP(
            kind=constants.CEPHFILESYSTEM,
            namespace=self.namespace
        )
        # Check for Ceph pods
        assert pod.wait_for_resource(
            condition='Running', selector='app=rook-ceph-mon',
            resource_count=3, timeout=600
        )
        assert pod.wait_for_resource(
            condition='Running', selector='app=rook-ceph-mgr',
            timeout=600
        )
        assert pod.wait_for_resource(
            condition='Running', selector='app=rook-ceph-osd',
            resource_count=3, timeout=600
        )

        # validate ceph mon/osd volumes are backed by pvc
        validate_cluster_on_pvc()

        # Creating toolbox pod
        setup_ceph_toolbox()

        assert pod.wait_for_resource(
            condition=constants.STATUS_RUNNING,
            selector='app=rook-ceph-tools', resource_count=1, timeout=600
        )

        # Check for CephFilesystem creation in ocp
        cfs_data = cfs.get()
        cfs_name = cfs_data['items'][0]['metadata']['name']

        if helpers.validate_cephfilesystem(cfs_name):
            logger.info(f"MDS deployment is successful!")
            defaults.CEPHFILESYSTEM_NAME = cfs_name
        else:
            logger.error(
                f"MDS deployment Failed! Please check logs!"
            )

        # Change monitoring backend to OCS
        if config.ENV_DATA.get('monitoring_enabled') and config.ENV_DATA.get('persistent-monitoring'):

            sc = helpers.default_storage_class(interface_type=constants.CEPHBLOCKPOOL)

            # Get the list of monitoring pods
            pods_list = get_all_pods(
                namespace=defaults.OCS_MONITORING_NAMESPACE,
                selector=['prometheus', 'alertmanager']
            )

            # Create configmap cluster-monitoring-config and reconfigure
            # storage class and telemeter server (if the url is specified in a
            # config file)
            create_configmap_cluster_monitoring_pod(
                sc_name=sc.name,
                telemeter_server_url=config.ENV_DATA.get("telemeter_server_url"))

            # Take some time to respin the pod
            waiting_time = 45
            logger.info(f"Waiting {waiting_time} seconds...")
            time.sleep(waiting_time)

            # Validate the pods are respinned and in running state
            validate_pods_are_respinned_and_running_state(
                pods_list
            )

            # Validate the pvc is created on monitoring pods
            validate_pvc_created_and_bound_on_monitoring_pods()

            # Validate the pvc are mounted on pods
            validate_pvc_are_mounted_on_monitoring_pods(pods_list)
        elif config.ENV_DATA.get('monitoring_enabled') and config.ENV_DATA.get("telemeter_server_url"):
            # Create configmap cluster-monitoring-config to reconfigure
            # telemeter server url when 'persistent-monitoring' is False
            create_configmap_cluster_monitoring_pod(
                telemeter_server_url=config.ENV_DATA["telemeter_server_url"])

        # Change registry backend to OCS CEPHFS RWX PVC
        registry.change_registry_backend_to_ocs()

        # Verify health of ceph cluster
        # TODO: move destroy cluster logic to new CLI usage pattern?
        logger.info("Done creating rook resources, waiting for HEALTH_OK")
        assert ceph_health_check(
            namespace=self.namespace
        )
        # patch gp2/thin storage class as 'non-default'
        self.patch_default_sc_to_non_default()