示例#1
0
class VSPHEREBASE(Deployment):
    def __init__(self):
        """
        This would be base for both IPI and UPI deployment
        """
        super(VSPHEREBASE, self).__init__()
        self.region = config.ENV_DATA['region']
        self.server = config.ENV_DATA['vsphere_server']
        self.user = config.ENV_DATA['vsphere_user']
        self.password = config.ENV_DATA['vsphere_password']
        self.cluster = config.ENV_DATA['vsphere_cluster']
        self.datacenter = config.ENV_DATA['vsphere_datacenter']
        self.datastore = config.ENV_DATA['vsphere_datastore']
        self.vsphere = VSPHEREUtil(self.server, self.user, self.password)
        self.upi_repo_path = os.path.join(
            constants.EXTERNAL_DIR,
            'installer'
        )
        self.upi_scale_up_repo_path = os.path.join(
            constants.EXTERNAL_DIR,
            'openshift-misc'
        )
        os.environ['TF_LOG'] = config.ENV_DATA.get('TF_LOG_LEVEL', "TRACE")
        os.environ['TF_LOG_PATH'] = os.path.join(
            config.ENV_DATA.get('cluster_path'),
            config.ENV_DATA.get('TF_LOG_FILE')
        )

        self.wait_time = 90

    def attach_disk(self, size=100):
        """
        Add a new disk to all the workers nodes

        Args:
            size (int): Size of disk in GB (default: 100)

        """
        vms = self.vsphere.get_all_vms_in_pool(
            config.ENV_DATA.get("cluster_name"),
            self.datacenter,
            self.cluster
        )
        # Add disks to all worker nodes
        for vm in vms:
            if "compute" in vm.name:
                self.vsphere.add_disks(
                    config.ENV_DATA.get("extra_disks", 1),
                    vm,
                    size,
                    constants.VM_DISK_TYPE
                )

    def add_nodes(self):
        """
        Add new nodes to the cluster
        """
        # create separate directory for scale-up terraform data
        scaleup_terraform_data_dir = os.path.join(
            self.cluster_path,
            constants.TERRAFORM_DATA_DIR,
            constants.SCALEUP_TERRAFORM_DATA_DIR
        )
        create_directory_path(scaleup_terraform_data_dir)
        logger.info(
            f"scale-up terraform data directory: {scaleup_terraform_data_dir}"
        )

        # git clone repo from openshift-misc
        clone_repo(
            constants.VSPHERE_SCALEUP_REPO, self.upi_scale_up_repo_path
        )

        # modify scale-up repo
        self.modify_scaleup_repo()

        config.ENV_DATA['vsphere_resource_pool'] = config.ENV_DATA.get(
            "cluster_name"
        )

        # sync guest time with host
        if config.ENV_DATA.get('sync_time_with_host'):
            sync_time_with_host(constants.SCALEUP_VSPHERE_MACHINE_CONF, True)

        # get the RHCOS worker list
        self.rhcos_ips = get_node_ips()
        logger.info(f"RHCOS IP's: {json.dumps(self.rhcos_ips)}")

        # generate terraform variable for scaling nodes
        self.generate_terraform_vars_for_scaleup()

        # Add nodes using terraform
        scaleup_terraform = Terraform(constants.SCALEUP_VSPHERE_DIR)
        previous_dir = os.getcwd()
        os.chdir(scaleup_terraform_data_dir)
        scaleup_terraform.initialize()
        scaleup_terraform.apply(self.scale_up_terraform_var)
        scaleup_terraform_tfstate = os.path.join(
            scaleup_terraform_data_dir,
            "terraform.tfstate"
        )
        out = scaleup_terraform.output(
            scaleup_terraform_tfstate,
            "rhel_worker"
        )
        rhel_worker_nodes = json.loads(out)['value']
        logger.info(f"RHEL worker nodes: {rhel_worker_nodes}")
        os.chdir(previous_dir)

        # Install OCP on rhel nodes
        rhel_install = OCPINSTALLRHEL(rhel_worker_nodes)
        rhel_install.prepare_rhel_nodes()
        rhel_install.execute_ansible_playbook()

        # Giving some time to settle down the new nodes
        time.sleep(self.wait_time)

        # wait for nodes to be in READY state
        wait_for_nodes_status(timeout=300)

    def generate_terraform_vars_for_scaleup(self):
        """
        Generates the terraform variables file for scaling nodes
        """
        logger.info("Generating terraform variables for scaling nodes")
        _templating = Templating()
        scale_up_terraform_var_template = "scale_up_terraform.tfvars.j2"
        scale_up_terraform_var_template_path = os.path.join(
            "ocp-deployment", scale_up_terraform_var_template
        )
        scale_up_terraform_config_str = _templating.render_template(
            scale_up_terraform_var_template_path, config.ENV_DATA
        )
        scale_up_terraform_var_yaml = os.path.join(
            self.cluster_path,
            constants.TERRAFORM_DATA_DIR,
            constants.SCALEUP_TERRAFORM_DATA_DIR,
            "scale_up_terraform.tfvars.yaml"
        )
        with open(scale_up_terraform_var_yaml, "w") as f:
            f.write(scale_up_terraform_config_str)

        self.scale_up_terraform_var = convert_yaml2tfvars(
            scale_up_terraform_var_yaml
        )
        logger.info(
            f"scale-up terraform variable file: {self.scale_up_terraform_var}"
        )

        # append RHCOS ip list to terraform variable file
        with open(self.scale_up_terraform_var, "a+") as fd:
            fd.write(f"rhcos_list = {json.dumps(self.rhcos_ips)}")

    def modify_scaleup_repo(self):
        """
        Modify the scale-up repo. Considering the user experience, removing the
        access and secret keys and variable from appropriate location in the
        scale-up repo
        """
        # remove access and secret key from constants.SCALEUP_VSPHERE_MAIN
        access_key = 'access_key       = "${var.aws_access_key}"'
        secret_key = 'secret_key       = "${var.aws_secret_key}"'
        replace_content_in_file(
            constants.SCALEUP_VSPHERE_MAIN,
            f"{access_key}",
            " "
        )
        replace_content_in_file(
            constants.SCALEUP_VSPHERE_MAIN,
            f"{secret_key}",
            " "
        )

        # remove access and secret key from constants.SCALEUP_VSPHERE_ROUTE53
        route53_access_key = 'access_key = "${var.access_key}"'
        route53_secret_key = 'secret_key = "${var.secret_key}"'
        replace_content_in_file(
            constants.SCALEUP_VSPHERE_ROUTE53,
            f"{route53_access_key}",
            " "
        )
        replace_content_in_file(
            constants.SCALEUP_VSPHERE_ROUTE53,
            f"{route53_secret_key}",
            " "
        )

        replace_content_in_file(
            constants.SCALEUP_VSPHERE_ROUTE53,
            "us-east-1",
            f"{config.ENV_DATA.get('region')}"
        )

        # remove access and secret variables from scale-up repo
        remove_keys_from_tf_variable_file(
            constants.SCALEUP_VSPHERE_VARIABLES,
            ['aws_access_key', 'aws_secret_key'])
        remove_keys_from_tf_variable_file(
            constants.SCALEUP_VSPHERE_ROUTE53_VARIABLES,
            ['access_key', 'secret_key']
        )

        # change root disk size
        change_vm_root_disk_size(constants.SCALEUP_VSPHERE_MACHINE_CONF)

    def delete_disks(self):
        """
        Delete the extra disks from all the worker nodes
        """
        vms = self.get_compute_vms(self.datacenter, self.cluster)
        if vms:
            for vm in vms:
                self.vsphere.remove_disks(vm)
        else:
            logger.debug("NO Resource Pool or VMs exists")

    def get_compute_vms(self, dc, cluster):
        """
        Gets the compute VM's from resource pool

        Args:
            dc (str): Datacenter name
            cluster (str): Cluster name

        Returns:
            list: VM instance

        """
        if self.vsphere.is_resource_pool_exist(
            config.ENV_DATA['cluster_name'],
            self.datacenter,
            self.cluster
        ):
            vms = self.vsphere.get_all_vms_in_pool(
                config.ENV_DATA.get("cluster_name"),
                dc,
                cluster
            )
            return [
                vm for vm in vms if "compute" in vm.name or "rhel" in vm.name
            ]

    def add_rdm_disks(self):
        """
        Attaches RDM disk to the compute nodes

        Raises:
            RDMDiskNotFound: In case there is no disks found on host

        """
        logger.info("Adding RDM disk to all compute nodes")
        datastore_type = self.vsphere.get_datastore_type_by_name(
            self.datastore,
            self.datacenter
        )

        compute_vms = self.get_compute_vms(self.datacenter, self.cluster)
        for vm in compute_vms:
            host = self.vsphere.get_host(vm)
            logger.info(f"{vm.name} belongs to host {host.name}")
            devices_available = self.vsphere.available_storage_devices(
                host,
                datastore_type=datastore_type
            )
            if not devices_available:
                raise RDMDiskNotFound

            # Erase the partition on the disk before adding to node
            device = devices_available[0]
            self.vsphere.erase_partition(host, device)

            # Attach RDM disk to node
            self.attach_rdm_disk(vm, device)

    def attach_rdm_disk(self, vm, device_name):
        """
        Attaches RDM disk to host

        Args:
            vm (vim.VirtualMachine): VM instance
            device_name (str): Device name to add to VM.
                e.g:"/vmfs/devices/disks/naa.600304801b540c0125ef160f3048faba"

        """
        self.vsphere.add_rdm_disk(vm, device_name)

    def post_destroy_checks(self):
        """
        Post destroy checks on cluster
        """
        pool = config.ENV_DATA['cluster_name']
        if self.vsphere.is_resource_pool_exist(
                pool,
                self.datacenter,
                self.cluster
        ):
            logger.warning(
                f"Resource pool {pool} exists even after destroying cluster"
            )
            self.vsphere.destroy_pool(pool, self.datacenter, self.cluster)
        else:
            logger.info(
                f"Resource pool {pool} does not exist in "
                f"cluster {self.cluster}"
            )

        # destroy the folder in templates
        self.vsphere.destroy_folder(pool, self.cluster, self.datacenter)
示例#2
0
class VSPHEREBASE(Deployment):

    # default storage class for StorageCluster CRD on VmWare platform
    DEFAULT_STORAGECLASS = "thin"

    def __init__(self):
        """
        This would be base for both IPI and UPI deployment
        """
        super(VSPHEREBASE, self).__init__()
        self.region = config.ENV_DATA["region"]
        self.server = config.ENV_DATA["vsphere_server"]
        self.user = config.ENV_DATA["vsphere_user"]
        self.password = config.ENV_DATA["vsphere_password"]
        self.cluster = config.ENV_DATA["vsphere_cluster"]
        self.datacenter = config.ENV_DATA["vsphere_datacenter"]
        self.datastore = config.ENV_DATA["vsphere_datastore"]
        self.vsphere = VSPHEREUtil(self.server, self.user, self.password)
        self.upi_repo_path = os.path.join(constants.EXTERNAL_DIR, "installer")
        self.upi_scale_up_repo_path = os.path.join(constants.EXTERNAL_DIR,
                                                   "openshift-misc")
        self.cluster_launcer_repo_path = os.path.join(constants.EXTERNAL_DIR,
                                                      "cluster-launcher")
        os.environ["TF_LOG"] = config.ENV_DATA.get("TF_LOG_LEVEL", "TRACE")
        os.environ["TF_LOG_PATH"] = os.path.join(
            config.ENV_DATA.get("cluster_path"),
            config.ENV_DATA.get("TF_LOG_FILE"))

        # pre-checks for the vSphere environment
        # skip pre-checks for destroying cluster
        teardown = config.RUN["cli_params"].get("teardown")
        if not teardown:
            vsphere_prechecks = VSpherePreChecks()
            vsphere_prechecks.get_all_checks()

        self.ocp_version = get_ocp_version()

        self.wait_time = 90

    def attach_disk(self, size=100, disk_type=constants.VM_DISK_TYPE):
        """
        Add a new disk to all the workers nodes

        Args:
            size (int): Size of disk in GB (default: 100)

        """
        vms = self.vsphere.get_all_vms_in_pool(
            config.ENV_DATA.get("cluster_name"), self.datacenter, self.cluster)
        # Add disks to all worker nodes
        for vm in vms:
            if "compute" in vm.name:
                self.vsphere.add_disks(config.ENV_DATA.get("extra_disks", 1),
                                       vm, size, disk_type)

    def add_nodes(self):
        """
        Add new nodes to the cluster
        """
        # create separate directory for scale-up terraform data
        scaleup_terraform_data_dir = os.path.join(
            self.cluster_path,
            constants.TERRAFORM_DATA_DIR,
            constants.SCALEUP_TERRAFORM_DATA_DIR,
        )
        create_directory_path(scaleup_terraform_data_dir)
        logger.info(
            f"scale-up terraform data directory: {scaleup_terraform_data_dir}")

        # git clone repo from openshift-misc
        clone_repo(constants.VSPHERE_SCALEUP_REPO, self.upi_scale_up_repo_path)

        # git clone repo from cluster-launcher
        clone_repo(constants.VSPHERE_CLUSTER_LAUNCHER,
                   self.cluster_launcer_repo_path)

        helpers = VSPHEREHELPERS()
        helpers.modify_scaleup_repo()

        config.ENV_DATA["vsphere_resource_pool"] = config.ENV_DATA.get(
            "cluster_name")

        # sync guest time with host
        sync_time_with_host_file = constants.SCALEUP_VSPHERE_MACHINE_CONF
        if config.ENV_DATA["folder_structure"]:
            sync_time_with_host_file = os.path.join(
                constants.CLUSTER_LAUNCHER_VSPHERE_DIR,
                f"aos-{get_ocp_version(seperator='_')}",
                constants.CLUSTER_LAUNCHER_MACHINE_CONF,
            )
        if config.ENV_DATA.get("sync_time_with_host"):
            sync_time_with_host(sync_time_with_host_file, True)

        # get the RHCOS worker list
        rhcos_ips = get_node_ips()
        logger.info(f"RHCOS IP's: {json.dumps(rhcos_ips)}")

        # generate terraform variable for scaling nodes
        self.scale_up_terraform_var = helpers.generate_terraform_vars_for_scaleup(
            rhcos_ips)

        # choose the vsphere_dir based on OCP version
        # generate cluster_info and config yaml files
        # for OCP version greater than 4.4
        vsphere_dir = constants.SCALEUP_VSPHERE_DIR
        rhel_module = "rhel-worker"
        if Version.coerce(self.ocp_version) >= Version.coerce("4.5"):
            vsphere_dir = os.path.join(
                constants.CLUSTER_LAUNCHER_VSPHERE_DIR,
                f"aos-{get_ocp_version('_')}",
                "vsphere",
            )
            helpers.generate_cluster_info()
            helpers.generate_config_yaml()
            rhel_module = "RHEL_WORKER_LIST"

        # Add nodes using terraform
        scaleup_terraform = Terraform(vsphere_dir)
        previous_dir = os.getcwd()
        os.chdir(scaleup_terraform_data_dir)
        scaleup_terraform.initialize()
        scaleup_terraform.apply(self.scale_up_terraform_var)
        scaleup_terraform_tfstate = os.path.join(scaleup_terraform_data_dir,
                                                 "terraform.tfstate")
        out = scaleup_terraform.output(scaleup_terraform_tfstate, rhel_module)
        if config.ENV_DATA["folder_structure"]:
            rhel_worker_nodes = out.strip().replace('"', "").split(",")
        else:
            rhel_worker_nodes = json.loads(out)["value"]

        logger.info(f"RHEL worker nodes: {rhel_worker_nodes}")
        os.chdir(previous_dir)

        # Install OCP on rhel nodes
        rhel_install = OCPINSTALLRHEL(rhel_worker_nodes)
        rhel_install.prepare_rhel_nodes()
        rhel_install.execute_ansible_playbook()

        # Giving some time to settle down the new nodes
        time.sleep(self.wait_time)

        # wait for nodes to be in READY state
        wait_for_nodes_status(timeout=300)

    def delete_disks(self):
        """
        Delete the extra disks from all the worker nodes
        """
        vms = self.get_compute_vms(self.datacenter, self.cluster)
        if vms:
            for vm in vms:
                self.vsphere.remove_disks(vm)
        else:
            logger.debug("NO Resource Pool or VMs exists")

    def get_compute_vms(self, dc, cluster):
        """
        Gets the compute VM's from resource pool

        Args:
            dc (str): Datacenter name
            cluster (str): Cluster name

        Returns:
            list: VM instance

        """
        if self.vsphere.is_resource_pool_exist(config.ENV_DATA["cluster_name"],
                                               self.datacenter, self.cluster):
            vms = self.vsphere.get_all_vms_in_pool(
                config.ENV_DATA.get("cluster_name"), dc, cluster)
            return [
                vm for vm in vms if "compute" in vm.name or "rhel" in vm.name
            ]

    def add_rdm_disks(self):
        """
        Attaches RDM disk to the compute nodes

        Raises:
            RDMDiskNotFound: In case there is no disks found on host

        """
        logger.info("Adding RDM disk to all compute nodes")
        datastore_type = self.vsphere.get_datastore_type_by_name(
            self.datastore, self.datacenter)

        compute_vms = self.get_compute_vms(self.datacenter, self.cluster)
        for vm in compute_vms:
            host = self.vsphere.get_host(vm)
            logger.info(f"{vm.name} belongs to host {host.name}")
            devices_available = self.vsphere.available_storage_devices(
                host, datastore_type=datastore_type)
            if not devices_available:
                raise RDMDiskNotFound

            # Erase the partition on the disk before adding to node
            device = devices_available[0]
            self.vsphere.erase_partition(host, device)

            # Attach RDM disk to node
            self.attach_rdm_disk(vm, device)

    def attach_rdm_disk(self, vm, device_name):
        """
        Attaches RDM disk to host

        Args:
            vm (vim.VirtualMachine): VM instance
            device_name (str): Device name to add to VM.
                e.g:"/vmfs/devices/disks/naa.600304801b540c0125ef160f3048faba"

        """
        self.vsphere.add_rdm_disk(vm, device_name)

    def post_destroy_checks(self):
        """
        Post destroy checks on cluster
        """
        pool = config.ENV_DATA["cluster_name"]
        if self.vsphere.is_resource_pool_exist(pool, self.datacenter,
                                               self.cluster):
            logger.warning(
                f"Resource pool {pool} exists even after destroying cluster")
            self.vsphere.destroy_pool(pool, self.datacenter, self.cluster)
        else:
            logger.info(f"Resource pool {pool} does not exist in "
                        f"cluster {self.cluster}")

        # destroy the folder in templates
        self.vsphere.destroy_folder(pool, self.cluster, self.datacenter)

        # remove .terraform directory ( this is only to reclaim space )
        terraform_plugins_dir = os.path.join(
            config.ENV_DATA["cluster_path"],
            constants.TERRAFORM_DATA_DIR,
            constants.TERRAFORM_PLUGINS_DIR,
        )
        rmtree(terraform_plugins_dir, ignore_errors=True)

    def check_cluster_existence(self, cluster_name_prefix):
        """
        Check cluster existence according to cluster name prefix

        Args:
            cluster_name_prefix (str): The cluster name prefix to look for

        Returns:
            bool: True if a cluster with the same name prefix already exists,
                False otherwise

        """
        cluster_name_pattern = cluster_name_prefix
        rp_exist = self.vsphere.is_resource_pool_prefix_exist(
            cluster_name_pattern, self.datacenter, self.cluster)
        if rp_exist:
            logger.error(
                f"Resource pool with the prefix of {cluster_name_prefix} was found"
            )
            return True
        else:
            return False