Exemplo n.º 1
0
def setup_persistent_monitoring():
    """
    Change monitoring backend to OCS
    """
    sc = helpers.default_storage_class(interface_type=constants.CEPHBLOCKPOOL)

    # Get the list of monitoring pods
    pods_list = get_all_pods(
        namespace=defaults.OCS_MONITORING_NAMESPACE,
        selector=["prometheus", "alertmanager"],
    )

    # Create configmap cluster-monitoring-config and reconfigure
    # storage class and telemeter server (if the url is specified in a
    # config file)
    create_configmap_cluster_monitoring_pod(
        sc_name=sc.name,
        telemeter_server_url=config.ENV_DATA.get("telemeter_server_url"),
    )

    # Take some time to respin the pod
    waiting_time = 45
    logger.info(f"Waiting {waiting_time} seconds...")
    time.sleep(waiting_time)

    # Validate the pods are respinned and in running state
    retry((CommandFailed, ResourceWrongStatusException), tries=3,
          delay=15)(validate_pods_are_respinned_and_running_state)(pods_list)

    # Validate the pvc is created on monitoring pods
    validate_pvc_created_and_bound_on_monitoring_pods()

    # Validate the pvc are mounted on pods
    retry((CommandFailed, AssertionError), tries=3,
          delay=15)(validate_pvc_are_mounted_on_monitoring_pods)(pods_list)
Exemplo n.º 2
0
    def setup(
        self,
        storageclass_factory,
        project_factory,
        snapshot_restore_factory,
        pvc_clone_factory,
        create_pvcs_and_pods,
        pvc_create_sc_type,
        restore_sc_type,
    ):
        """
        Create Storage Class, PVCs and pods

        """
        self.pvc_size = 2

        if "thick" in (pvc_create_sc_type, restore_sc_type):
            # Thick provisioning is applicable only for RBD
            thick_sc = storageclass_factory(
                interface=constants.CEPHBLOCKPOOL,
                new_rbd_pool=False,
                rbd_thick_provision=True,
            )
            access_modes_cephfs = None
            num_of_cephfs_pvc = 0
            thin_sc = default_storage_class(constants.CEPHBLOCKPOOL)
        else:
            thick_sc = None
            access_modes_cephfs = [constants.ACCESS_MODE_RWO]
            num_of_cephfs_pvc = 1
            thin_sc = default_storage_class(constants.CEPHFILESYSTEM)

        sc_dict = {"thin": thin_sc, "thick": thick_sc}
        self.pvc_create_sc = sc_dict[pvc_create_sc_type]
        self.restore_sc = sc_dict[restore_sc_type]

        self.pvcs, self.pods = create_pvcs_and_pods(
            pvc_size=self.pvc_size,
            access_modes_rbd=[constants.ACCESS_MODE_RWO],
            access_modes_cephfs=access_modes_cephfs,
            num_of_rbd_pvc=1,
            num_of_cephfs_pvc=num_of_cephfs_pvc,
            sc_rbd=self.pvc_create_sc,
        )
Exemplo n.º 3
0
 def storageclass(self, storageclass_factory, reclaim_policy):
     """
     Create storage class if reclaim policy is not "Delete"
     """
     self.reclaim_policy = reclaim_policy
     self.sc_obj = (default_storage_class(constants.CEPHBLOCKPOOL)
                    if reclaim_policy == constants.RECLAIM_POLICY_DELETE
                    else storageclass_factory(
                        interface=constants.CEPHBLOCKPOOL,
                        reclaim_policy=self.reclaim_policy))
Exemplo n.º 4
0
    def amq_setup(self, amq_factory_fixture):
        """
        Creates amq cluster and run benchmarks

        """
        sc_name = default_storage_class(interface_type=constants.CEPHBLOCKPOOL)
        self.amq, self.threads = amq_factory_fixture(sc_name=sc_name.name)

        # Initialize Sanity instance
        self.sanity_helpers = Sanity()
Exemplo n.º 5
0
    def test_install_amq_backed_by_ocs(self, interface, test_fixture_amq):
        """
        Create amq cluster and run open messages on it
        """
        # Get sc
        sc = default_storage_class(interface_type=interface)

        # Deploy amq cluster
        test_fixture_amq.setup_amq_cluster(sc.name)

        # Run open messages
        test_fixture_amq.create_messaging_on_amq()

        # Wait for some time to generate msg
        waiting_time = 60
        log.info(f"Waiting for {waiting_time}sec to generate msg")
        time.sleep(waiting_time)

        # Check messages are sent and received
        threads = test_fixture_amq.run_in_bg()
        for thread in threads:
            thread.result(timeout=1800)
    def setup(self, interface_type, reclaim_policy, storageclass_factory):
        """
        Creates storage class with specified interface and reclaim policy.
        Fetches all worker nodes

        Args:
            interface_type (str): The type of the interface
                (e.g. CephBlockPool, CephFileSystem)
            reclaim_policy (str): The type of reclaim policy
                (eg., 'Delete', 'Retain')
            storageclass_factory: A fixture to create new storage class

        Returns:
            tuple: containing the storage class instance and list of worker nodes

        """
        # Create storage class if reclaim policy is not "Delete"
        sc_obj = (default_storage_class(interface_type) if reclaim_policy
                  == constants.RECLAIM_POLICY_DELETE else storageclass_factory(
                      interface=interface_type, reclaim_policy=reclaim_policy))
        worker_nodes_list = node.get_worker_nodes()

        return sc_obj, worker_nodes_list
Exemplo n.º 7
0
    def create_multi_pvc_pod(self,
                             pods_per_iter=5,
                             io_runtime=3600,
                             start_io=False,
                             pvc_size=None):
        """
        Function to create PVC of different type and attach them to PODs and start IO.

        Args:
            pods_per_iter (int): Number of PVC-POD to be created per PVC type
            Example, If 2 then 8 PVC+POD will be created with 2 each of 4 PVC types
            io_runtime (sec): Fio run time in seconds
            start_io (bool): If True start IO else don't
            pvc_size (Gi): size of PVC

        Returns:
            pod_objs (obj): Objs of all the PODs created
            pvc_objs (obj): Objs of all the PVCs created

        """
        rbd_sc = helpers.default_storage_class(constants.CEPHBLOCKPOOL)
        cephfs_sc = helpers.default_storage_class(constants.CEPHFILESYSTEM)
        pvc_size = pvc_size or f"{random.randrange(15, 105, 5)}Gi"
        fio_size = get_size_based_on_cls_usage()
        fio_rate = get_rate_based_on_cls_iops()
        logging.info(f"Create {pods_per_iter * 4} PVCs and PODs")
        # Create PVCs
        cephfs_pvcs = helpers.create_multiple_pvc_parallel(
            sc_obj=cephfs_sc,
            namespace=self.namespace,
            number_of_pvc=pods_per_iter,
            size=pvc_size,
            access_modes=[
                constants.ACCESS_MODE_RWO, constants.ACCESS_MODE_RWX
            ],
        )
        rbd_pvcs = helpers.create_multiple_pvc_parallel(
            sc_obj=rbd_sc,
            namespace=self.namespace,
            number_of_pvc=pods_per_iter,
            size=pvc_size,
            access_modes=[
                constants.ACCESS_MODE_RWO, constants.ACCESS_MODE_RWX
            ],
        )

        # Appending all the pvc_obj and pod_obj to list
        pvc_objs, pod_objs = ([] for i in range(2))
        pvc_objs.extend(cephfs_pvcs + rbd_pvcs)

        # Create pods with above pvc list
        cephfs_pods = helpers.create_pods_parallel(
            cephfs_pvcs,
            self.namespace,
            constants.CEPHFS_INTERFACE,
            pod_dict_path=self.pod_dict_path,
            sa_name=self.sa_name,
            dc_deployment=self.dc_deployment,
            node_selector=self.node_selector,
        )
        rbd_rwo_pvc, rbd_rwx_pvc = ([] for i in range(2))
        for pvc_obj in rbd_pvcs:
            if pvc_obj.get_pvc_access_mode == constants.ACCESS_MODE_RWX:
                rbd_rwx_pvc.append(pvc_obj)
            else:
                rbd_rwo_pvc.append(pvc_obj)
        rbd_rwo_pods = helpers.create_pods_parallel(
            rbd_rwo_pvc,
            self.namespace,
            constants.CEPHBLOCKPOOL,
            pod_dict_path=self.pod_dict_path,
            sa_name=self.sa_name,
            dc_deployment=self.dc_deployment,
            node_selector=self.node_selector,
        )
        rbd_rwx_pods = helpers.create_pods_parallel(
            rbd_rwx_pvc,
            self.namespace,
            constants.CEPHBLOCKPOOL,
            pod_dict_path=self.pod_dict_path,
            sa_name=self.sa_name,
            dc_deployment=self.dc_deployment,
            raw_block_pv=True,
            node_selector=self.node_selector,
        )
        temp_pod_objs = list()
        temp_pod_objs.extend(cephfs_pods + rbd_rwo_pods)

        # Appending all the pod_obj to list
        pod_objs.extend(temp_pod_objs + rbd_rwx_pods)

        # Start IO
        if start_io:
            threads = list()
            for pod_obj in temp_pod_objs:
                process = threading.Thread(
                    target=pod_obj.run_io,
                    kwargs={
                        "storage_type": "fs",
                        "size": fio_size,
                        "runtime": io_runtime,
                        "rate": fio_rate,
                    },
                )
                process.start()
                threads.append(process)
                time.sleep(30)
            for pod_obj in rbd_rwx_pods:
                process = threading.Thread(
                    target=pod_obj.run_io,
                    kwargs={
                        "storage_type": "block",
                        "size": fio_size,
                        "runtime": io_runtime,
                        "rate": fio_rate,
                    },
                )
                process.start()
                threads.append(process)
                time.sleep(30)
            for process in threads:
                process.join()

        return pod_objs, pvc_objs
Exemplo n.º 8
0
    def test_rgw_kafka_notifications(self, bucket_factory):
        """
        Test to verify rgw kafka notifications

        """
        # Get sc
        sc = default_storage_class(interface_type=constants.CEPHBLOCKPOOL)

        # Deploy amq cluster
        self.amq.setup_amq_cluster(sc.name)

        # Create topic
        self.kafka_topic = self.amq.create_kafka_topic()

        # Create Kafkadrop pod
        (
            self.kafkadrop_pod,
            self.kafkadrop_pod,
            self.kafkadrop_route,
        ) = self.amq.create_kafkadrop()

        # Get the kafkadrop route
        kafkadrop_host = self.kafkadrop_route.get().get("spec").get("host")

        # Create bucket
        bucketname = bucket_factory(amount=1, interface="RGW-OC")[0].name

        # Get RGW credentials
        rgw_obj = RGW()
        rgw_endpoint, access_key, secret_key = rgw_obj.get_credentials()

        # Clone notify repo
        notify_path = clone_notify()

        # Initialise to put objects
        data = "A random string data to write on created rgw bucket"
        obc_obj = OBC(bucketname)
        s3_resource = boto3.resource(
            "s3",
            verify=retrieve_verification_mode(),
            endpoint_url=rgw_endpoint,
            aws_access_key_id=obc_obj.access_key_id,
            aws_secret_access_key=obc_obj.access_key,
        )
        s3_client = s3_resource.meta.client

        # Initialize notify command to run
        notify_cmd = (
            f"python {notify_path} -e {rgw_endpoint} -a {obc_obj.access_key_id} "
            f"-s {obc_obj.access_key} -b {bucketname} -ke {constants.KAFKA_ENDPOINT} -t {self.kafka_topic.name}"
        )
        log.info(f"Running cmd {notify_cmd}")

        # Put objects to bucket
        assert s3_client.put_object(Bucket=bucketname, Key="key-1",
                                    Body=data), "Failed: Put object: key-1"
        exec_cmd(notify_cmd)

        # Validate rgw logs notification are sent
        # No errors are seen
        pattern = "ERROR: failed to create push endpoint"
        rgw_pod_obj = get_rgw_pods()
        rgw_log = get_pod_logs(pod_name=rgw_pod_obj[0].name, container="rgw")
        assert re.search(pattern=pattern, string=rgw_log) is None, (
            f"Error: {pattern} msg found in the rgw logs."
            f"Validate {pattern} found on rgw logs and also "
            f"rgw bucket notification is working correctly")
        assert s3_client.put_object(Bucket=bucketname, Key="key-2",
                                    Body=data), "Failed: Put object: key-2"
        exec_cmd(notify_cmd)

        # Validate message are received Kafka side using curl command
        # A temporary way to check from Kafka side, need to check from UI
        curl_command = (
            f"curl -X GET {kafkadrop_host}/topic/{self.kafka_topic.name} "
            "-H 'content-type: application/vnd.kafka.json.v2+json'")
        json_output = run_cmd(cmd=curl_command)
        new_string = json_output.split()
        messages = new_string[new_string.index("messages</td>") + 1]
        if messages.find("1") == -1:
            raise Exception(
                "Error: Messages are not recieved from Kafka side."
                "RGW bucket notification is not working as expected.")

        # Validate the timestamp events
        ocs_version = config.ENV_DATA["ocs_version"]
        if Version.coerce(ocs_version) >= Version.coerce("4.8"):
            cmd = (
                f"bin/kafka-console-consumer.sh --bootstrap-server {constants.KAFKA_ENDPOINT} "
                f"--topic {self.kafka_topic.name} --from-beginning --timeout-ms 20000"
            )
            pod_list = get_pod_name_by_pattern(
                pattern="my-cluster-zookeeper",
                namespace=constants.AMQ_NAMESPACE)
            zookeeper_obj = get_pod_obj(name=pod_list[0],
                                        namespace=constants.AMQ_NAMESPACE)
            event_obj = zookeeper_obj.exec_cmd_on_pod(command=cmd)
            log.info(f"Event obj: {event_obj}")
            event_time = event_obj.get("Records")[0].get("eventTime")
            format_string = "%Y-%m-%dT%H:%M:%S.%fZ"
            try:
                datetime.strptime(event_time, format_string)
            except ValueError as ef:
                log.error(
                    f"Timestamp event {event_time} doesnt match the pattern {format_string}"
                )
                raise ef

            log.info(
                f"Timestamp event {event_time} matches the pattern {format_string}"
            )
Exemplo n.º 9
0
    def test_rgw_kafka_notifications(self, bucket_factory):
        """
        Test to verify rgw kafka notifications

        """
        # Get sc
        sc = default_storage_class(interface_type=constants.CEPHBLOCKPOOL)

        # Deploy amq cluster
        self.amq.setup_amq_cluster(sc.name)

        # Create topic
        self.kafka_topic = self.amq.create_kafka_topic()

        # Create Kafkadrop pod
        (
            self.kafkadrop_pod,
            self.kafkadrop_pod,
            self.kafkadrop_route,
        ) = self.amq.create_kafkadrop()

        # Get the kafkadrop route
        kafkadrop_host = self.kafkadrop_route.get().get("spec").get("host")

        # Create bucket
        bucketname = bucket_factory(amount=1, interface="RGW-OC")[0].name

        # Get RGW credentials
        rgw_obj = RGW()
        rgw_endpoint, access_key, secret_key = rgw_obj.get_credentials()

        # Clone notify repo
        notify_path = clone_notify()

        # Initialise to put objects
        data = "A random string data to write on created rgw bucket"
        obc_obj = OBC(bucketname)
        s3_resource = boto3.resource(
            "s3",
            verify=retrieve_verification_mode(),
            endpoint_url=rgw_endpoint,
            aws_access_key_id=obc_obj.access_key_id,
            aws_secret_access_key=obc_obj.access_key,
        )
        s3_client = s3_resource.meta.client

        # Initialize notify command to run
        notify_cmd = (
            f"python {notify_path} -e {rgw_endpoint} -a {obc_obj.access_key_id} "
            f"-s {obc_obj.access_key} -b {bucketname} -ke {constants.KAFKA_ENDPOINT} -t {self.kafka_topic.name}"
        )
        log.info(f"Running cmd {notify_cmd}")

        # Put objects to bucket
        assert s3_client.put_object(Bucket=bucketname, Key="key-1",
                                    Body=data), "Failed: Put object: key-1"
        exec_cmd(notify_cmd)

        # Validate rgw logs notification are sent
        # No errors are seen
        pattern = "ERROR: failed to create push endpoint"
        rgw_pod_obj = get_rgw_pods()
        rgw_log = get_pod_logs(pod_name=rgw_pod_obj[0].name, container="rgw")
        assert re.search(pattern=pattern, string=rgw_log) is None, (
            f"Error: {pattern} msg found in the rgw logs."
            f"Validate {pattern} found on rgw logs and also "
            f"rgw bucket notification is working correctly")
        assert s3_client.put_object(Bucket=bucketname, Key="key-2",
                                    Body=data), "Failed: Put object: key-2"
        exec_cmd(notify_cmd)

        # Validate message are received Kafka side using curl command
        # A temporary way to check from Kafka side, need to check from UI
        curl_command = (
            f"curl -X GET {kafkadrop_host}/topic/{self.kafka_topic.name} "
            "-H 'content-type: application/vnd.kafka.json.v2+json'")
        json_output = run_cmd(cmd=curl_command)
        new_string = json_output.split()
        messages = new_string[new_string.index("messages</td>") + 1]
        if messages.find("1") == -1:
            raise Exception(
                "Error: Messages are not recieved from Kafka side."
                "RGW bucket notification is not working as expected.")
    def base_setup(self, interface, pvc_factory, pod_factory):
        """
        A setup phase for the test:
        get all the ceph pods information,
        create maxsize pvc, pod and run IO

        """
        # Setting the io_size_gb to 40% of the total PVC capacity
        ceph_pod = Pod.get_ceph_tools_pod()
        external = config.DEPLOYMENT["external_mode"]
        if external:
            ocp_obj = ocp.OCP()
            if interface == constants.CEPHBLOCKPOOL:
                resource_name = constants.DEFAULT_EXTERNAL_MODE_STORAGECLASS_RBD
            elif interface == constants.CEPHFILESYSTEM:
                resource_name = constants.DEFAULT_EXTERNAL_MODE_STORAGECLASS_CEPHFS
            cmd = f"get sc {resource_name} -o yaml"
            pool_data = ocp_obj.exec_oc_cmd(cmd)
            pool = pool_data["parameters"]["pool"]

        else:
            pool = (constants.DEFAULT_BLOCKPOOL if interface
                    == constants.CEPHBLOCKPOOL else constants.DATA_POOL)

        ceph_replica = ceph_pod.exec_ceph_cmd(
            ceph_cmd=f"ceph osd pool get {pool} size")
        replica = ceph_replica["size"]
        ceph_status = ceph_pod.exec_ceph_cmd(ceph_cmd="ceph df")
        ceph_capacity = (int(ceph_status["stats"]["total_bytes"]) / replica /
                         constants.GB)
        pvc_size_gb = int(ceph_capacity * 0.5)
        io_size_gb = int(pvc_size_gb * 0.4)
        io_size_gb = 400 if io_size_gb >= 400 else io_size_gb

        pod_objs = get_all_pods(
            namespace=defaults.ROOK_CLUSTER_NAMESPACE,
            selector=[
                "noobaa", "rook-ceph-osd-prepare", "rook-ceph-drain-canary"
            ],
            exclude_selector=True,
        )

        # Create maxsize pvc, app pod and run ios
        self.sc = default_storage_class(interface_type=interface)

        self.pvc_obj = pvc_factory(
            interface=interface,
            storageclass=self.sc,
            size=pvc_size_gb,
        )

        self.pod_obj = pod_factory(interface=interface, pvc=self.pvc_obj)

        log.info(f"Running FIO to fill PVC size: {io_size_gb}G")
        self.pod_obj.run_io("fs",
                            size=f"{io_size_gb}G",
                            io_direction="write",
                            runtime=480)

        log.info("Waiting for IO results")
        self.pod_obj.get_fio_results()

        return pod_objs
Exemplo n.º 11
0
    def deploy_ocs(self):
        """
        Handle OCS deployment, since OCS deployment steps are common to any
        platform, implementing OCS deployment here in base class.
        """
        ceph_cluster = ocp.OCP(kind="CephCluster", namespace=self.namespace)
        try:
            ceph_cluster.get().get("items")[0]
            logger.warning("OCS cluster already exists")
            return
        except (IndexError, CommandFailed):
            logger.info("Running OCS basic installation")

        if config.DEPLOYMENT["external_mode"]:
            logger.info("Deploying OCS on external mode RHCS")
            return self.deploy_with_external_mode()
        self.deploy_ocs_via_operator()
        pod = ocp.OCP(kind=constants.POD, namespace=self.namespace)
        cfs = ocp.OCP(kind=constants.CEPHFILESYSTEM, namespace=self.namespace)
        # Check for Ceph pods
        assert pod.wait_for_resource(
            condition="Running",
            selector="app=rook-ceph-mon",
            resource_count=3,
            timeout=600,
        )
        assert pod.wait_for_resource(condition="Running",
                                     selector="app=rook-ceph-mgr",
                                     timeout=600)
        assert pod.wait_for_resource(
            condition="Running",
            selector="app=rook-ceph-osd",
            resource_count=3,
            timeout=600,
        )

        # validate ceph mon/osd volumes are backed by pvc
        validate_cluster_on_pvc()

        # validate PDB creation of MON, MDS, OSD pods
        validate_pdb_creation()

        # Creating toolbox pod
        setup_ceph_toolbox()

        assert pod.wait_for_resource(
            condition=constants.STATUS_RUNNING,
            selector="app=rook-ceph-tools",
            resource_count=1,
            timeout=600,
        )

        # Check for CephFilesystem creation in ocp
        cfs_data = cfs.get()
        cfs_name = cfs_data["items"][0]["metadata"]["name"]

        if helpers.validate_cephfilesystem(cfs_name):
            logger.info("MDS deployment is successful!")
            defaults.CEPHFILESYSTEM_NAME = cfs_name
        else:
            logger.error("MDS deployment Failed! Please check logs!")

        # Change monitoring backend to OCS
        if config.ENV_DATA.get("monitoring_enabled") and config.ENV_DATA.get(
                "persistent-monitoring"):

            sc = helpers.default_storage_class(
                interface_type=constants.CEPHBLOCKPOOL)

            # Get the list of monitoring pods
            pods_list = get_all_pods(
                namespace=defaults.OCS_MONITORING_NAMESPACE,
                selector=["prometheus", "alertmanager"],
            )

            # Create configmap cluster-monitoring-config and reconfigure
            # storage class and telemeter server (if the url is specified in a
            # config file)
            create_configmap_cluster_monitoring_pod(
                sc_name=sc.name,
                telemeter_server_url=config.ENV_DATA.get(
                    "telemeter_server_url"),
            )

            # Take some time to respin the pod
            waiting_time = 45
            logger.info(f"Waiting {waiting_time} seconds...")
            time.sleep(waiting_time)

            # Validate the pods are respinned and in running state
            retry((CommandFailed, ResourceWrongStatusException),
                  tries=3,
                  delay=15)(validate_pods_are_respinned_and_running_state)(
                      pods_list)

            # Validate the pvc is created on monitoring pods
            validate_pvc_created_and_bound_on_monitoring_pods()

            # Validate the pvc are mounted on pods
            retry((CommandFailed, AssertionError), tries=3, delay=15)(
                validate_pvc_are_mounted_on_monitoring_pods)(pods_list)
        elif config.ENV_DATA.get("monitoring_enabled") and config.ENV_DATA.get(
                "telemeter_server_url"):
            # Create configmap cluster-monitoring-config to reconfigure
            # telemeter server url when 'persistent-monitoring' is False
            create_configmap_cluster_monitoring_pod(
                telemeter_server_url=config.ENV_DATA["telemeter_server_url"])

        # Change registry backend to OCS CEPHFS RWX PVC
        registry.change_registry_backend_to_ocs()

        # Verify health of ceph cluster
        # TODO: move destroy cluster logic to new CLI usage pattern?
        logger.info("Done creating rook resources, waiting for HEALTH_OK")
        try:
            ceph_health_check(namespace=self.namespace, tries=30, delay=10)
        except CephHealthException as ex:
            err = str(ex)
            logger.warning(f"Ceph health check failed with {err}")
            if "clock skew detected" in err:
                logger.info(f"Changing NTP on compute nodes to"
                            f" {constants.RH_NTP_CLOCK}")
                if self.platform == constants.VSPHERE_PLATFORM:
                    update_ntp_compute_nodes()
                assert ceph_health_check(namespace=self.namespace,
                                         tries=60,
                                         delay=10)

        # patch gp2/thin storage class as 'non-default'
        self.patch_default_sc_to_non_default()