Exemplo n.º 1
0
    def deploy_ocp(self, log_cli_level='DEBUG'):
        """
        Deployment specific to OCP cluster on vSphere platform

        Args:
            log_cli_level (str): openshift installer's log level
                (default: "DEBUG")

        """
        super(VSPHEREUPI, self).deploy_ocp(log_cli_level)
        if config.ENV_DATA.get('scale_up'):
            logger.info("Adding extra nodes to cluster")
            self.add_nodes()

        # remove RHCOS compute nodes
        if (
                config.ENV_DATA.get('scale_up')
                and not config.ENV_DATA.get('mixed_cluster')
        ):
            rhcos_nodes = get_typed_worker_nodes()
            logger.info(
                f"RHCOS compute nodes to delete: "
                f"{[node.name for node in rhcos_nodes]}"
            )
            logger.info("Removing RHCOS compute nodes from a cluster")
            remove_nodes(rhcos_nodes)
Exemplo n.º 2
0
    def deploy_ocp(self, log_cli_level='DEBUG'):
        """
        Deployment specific to OCP cluster on vSphere platform

        Args:
            log_cli_level (str): openshift installer's log level
                (default: "DEBUG")

        """
        super(VSPHEREUPI, self).deploy_ocp(log_cli_level)
        if config.ENV_DATA.get('scale_up'):
            logger.info("Adding extra nodes to cluster")
            self.add_nodes()

        # remove RHCOS compute nodes
        if (config.ENV_DATA.get('scale_up')
                and not config.ENV_DATA.get('mixed_cluster')):
            rhcos_nodes = get_typed_worker_nodes()
            logger.info(f"RHCOS compute nodes to delete: "
                        f"{[node.name for node in rhcos_nodes]}")
            logger.info("Removing RHCOS compute nodes from a cluster")
            remove_nodes(rhcos_nodes)

        # get datastore type and configure chrony for all nodes ONLY if
        # datstore type is vsan
        datastore_type = self.vsphere.get_datastore_type_by_name(
            self.datastore, self.datacenter)
        if datastore_type != constants.VMFS:
            configure_chrony_and_wait_for_machineconfig_status(node_type="all",
                                                               timeout=1800)
Exemplo n.º 3
0
    def deploy_ocp(self, log_cli_level="DEBUG"):
        """
        Deployment specific to OCP cluster on vSphere platform

        Args:
            log_cli_level (str): openshift installer's log level
                (default: "DEBUG")

        """
        cluster_name_parts = config.ENV_DATA.get("cluster_name").split("-")
        prefix = cluster_name_parts[0]
        if not (
            prefix.startswith(tuple(constants.PRODUCTION_JOBS_PREFIX))
            or config.DEPLOYMENT.get("force_deploy_multiple_clusters")
        ):
            if self.check_cluster_existence(prefix):
                raise exceptions.SameNamePrefixClusterAlreadyExistsException(
                    f"Cluster with name prefix {prefix} already exists. "
                    f"Please destroy the existing cluster for a new cluster "
                    f"deployment"
                )
        super(VSPHEREUPI, self).deploy_ocp(log_cli_level)
        if config.ENV_DATA.get("scale_up"):
            logger.info("Adding extra nodes to cluster")
            self.add_nodes()

        # remove RHCOS compute nodes
        if config.ENV_DATA.get("scale_up") and not config.ENV_DATA.get("mixed_cluster"):
            rhcos_nodes = get_typed_worker_nodes()
            logger.info(
                f"RHCOS compute nodes to delete: "
                f"{[node.name for node in rhcos_nodes]}"
            )
            logger.info("Removing RHCOS compute nodes from a cluster")
            remove_nodes(rhcos_nodes)

        if config.DEPLOYMENT.get("thick_sc"):
            sc_data = templating.load_yaml(constants.VSPHERE_THICK_STORAGECLASS_YAML)
            sc_data_yaml = tempfile.NamedTemporaryFile(
                mode="w+", prefix="storageclass", delete=False
            )
            if config.DEPLOYMENT.get("eager_zeroed_thick_sc"):
                sc_data["parameters"]["diskformat"] = "eagerzeroedthick"
            else:
                sc_data["parameters"]["diskformat"] = "zeroedthick"
            templating.dump_data_to_temp_yaml(sc_data, sc_data_yaml.name)
            run_cmd(f"oc create -f {sc_data_yaml.name}")
            self.DEFAULT_STORAGECLASS = "thick"
Exemplo n.º 4
0
    def deploy_ocp(self, log_cli_level="DEBUG"):
        """
        Deployment specific to OCP cluster on vSphere platform

        Args:
            log_cli_level (str): openshift installer's log level
                (default: "DEBUG")

        """
        cluster_name_parts = config.ENV_DATA.get("cluster_name").split("-")
        prefix = cluster_name_parts[0]
        if not (
            prefix.startswith(tuple(constants.PRODUCTION_JOBS_PREFIX))
            or config.DEPLOYMENT.get("force_deploy_multiple_clusters")
        ):
            if self.check_cluster_existence(prefix):
                raise exceptions.SameNamePrefixClusterAlreadyExistsException(
                    f"Cluster with name prefix {prefix} already exists. "
                    f"Please destroy the existing cluster for a new cluster "
                    f"deployment"
                )
        super(VSPHEREUPI, self).deploy_ocp(log_cli_level)
        if config.ENV_DATA.get("scale_up"):
            logger.info("Adding extra nodes to cluster")
            self.add_nodes()

        # remove RHCOS compute nodes
        if config.ENV_DATA.get("scale_up") and not config.ENV_DATA.get("mixed_cluster"):
            rhcos_nodes = get_typed_worker_nodes()
            logger.info(
                f"RHCOS compute nodes to delete: "
                f"{[node.name for node in rhcos_nodes]}"
            )
            logger.info("Removing RHCOS compute nodes from a cluster")
            remove_nodes(rhcos_nodes)

        # get datastore type and configure chrony for all nodes ONLY if
        # datastore type is vsan
        datastore_type = self.vsphere.get_datastore_type_by_name(
            self.datastore, self.datacenter
        )
        if datastore_type != constants.VMFS:
            configure_chrony_and_wait_for_machineconfig_status(
                node_type="all", timeout=1800
            )
Exemplo n.º 5
0
    def test_simultaneous_drain_of_two_ocs_nodes(
        self,
        pvc_factory,
        pod_factory,
        dc_pod_factory,
        interface,
        bucket_factory,
        rgw_bucket_factory,
    ):
        """
        OCS-2128/OCS-2129:
        - Create PVCs and start IO on DC based app pods
        - Add one extra node in two of the AZs and label the nodes
          with OCS storage label
        - Maintenance (mark as unscheduable and drain) 2 worker nodes
          simultaneously
        - Confirm that OCS and DC pods are in running state
        - Remove unscheduled nodes
        - Check cluster functionality by creating resources
          (pools, storageclasses, PVCs, pods - both CephFS and RBD)
        - Check cluster and Ceph health

        """
        # Get OSD running nodes
        osd_running_worker_nodes = get_osd_running_nodes()
        log.info(f"OSDs are running on nodes {osd_running_worker_nodes}")

        # Label osd nodes with fedora app
        label_worker_node(osd_running_worker_nodes,
                          label_key="dc",
                          label_value="fedora")
        log.info("Successfully labeled worker nodes with {dc:fedora}")

        # Create DC app pods
        log.info("Creating DC based app pods and starting IO in background")
        interface = (constants.CEPHBLOCKPOOL
                     if interface == "rbd" else constants.CEPHFILESYSTEM)
        dc_pod_obj = []
        for i in range(2):
            dc_pod = dc_pod_factory(interface=interface,
                                    node_selector={"dc": "fedora"})
            pod.run_io_in_bg(dc_pod, fedora_dc=True)
            dc_pod_obj.append(dc_pod)

        # Get the machine name using the node name
        machine_names = [
            machine.get_machine_from_node_name(osd_running_worker_node)
            for osd_running_worker_node in osd_running_worker_nodes[:2]
        ]
        log.info(f"{osd_running_worker_nodes} associated "
                 f"machine are {machine_names}")

        # Get the machineset name using machine name
        machineset_names = [
            machine.get_machineset_from_machine_name(machine_name)
            for machine_name in machine_names
        ]
        log.info(f"{osd_running_worker_nodes} associated machineset "
                 f"is {machineset_names}")

        # Add a new node and label it
        add_new_node_and_label_it(machineset_names[0])
        add_new_node_and_label_it(machineset_names[1])

        # Drain 2 nodes
        drain_nodes(osd_running_worker_nodes[:2])

        # Check the pods should be in running state
        all_pod_obj = pod.get_all_pods(wait=True)
        for pod_obj in all_pod_obj:
            if ("-1-deploy" or "ocs-deviceset") not in pod_obj.name:
                try:
                    helpers.wait_for_resource_state(
                        resource=pod_obj,
                        state=constants.STATUS_RUNNING,
                        timeout=200)
                except ResourceWrongStatusException:
                    # 'rook-ceph-crashcollector' on the failed node stucks at
                    # pending state. BZ 1810014 tracks it.
                    # Ignoring 'rook-ceph-crashcollector' pod health check as
                    # WA and deleting its deployment so that the pod
                    # disappears. Will revert this WA once the BZ is fixed
                    if "rook-ceph-crashcollector" in pod_obj.name:
                        ocp_obj = ocp.OCP(
                            namespace=defaults.ROOK_CLUSTER_NAMESPACE)
                        pod_name = pod_obj.name
                        deployment_name = "-".join(pod_name.split("-")[:-2])
                        command = f"delete deployment {deployment_name}"
                        ocp_obj.exec_oc_cmd(command=command)
                        log.info(f"Deleted deployment for pod {pod_obj.name}")

        # DC app pods on the drained node will get automatically created on other
        # running node in same AZ. Waiting for all dc app pod to reach running state
        pod.wait_for_dc_app_pods_to_reach_running_state(dc_pod_obj,
                                                        timeout=1200)
        log.info("All the dc pods reached running state")

        # Remove unscheduled nodes
        # In scenarios where the drain is attempted on >3 worker setup,
        # post completion of drain we are removing the unscheduled nodes so
        # that we maintain 3 worker nodes.
        log.info(f"Removing scheduled nodes {osd_running_worker_nodes[:2]}")
        remove_node_objs = get_node_objs(osd_running_worker_nodes[:2])
        remove_nodes(remove_node_objs)

        # Check basic cluster functionality by creating resources
        # (pools, storageclasses, PVCs, pods - both CephFS and RBD),
        # run IO and delete the resources
        self.sanity_helpers.create_resources(pvc_factory, pod_factory,
                                             bucket_factory,
                                             rgw_bucket_factory)
        self.sanity_helpers.delete_resources()

        # Perform cluster and Ceph health checks
        self.sanity_helpers.health_check()