def deploy_ocp(self, log_cli_level='DEBUG'): """ Deployment specific to OCP cluster on vSphere platform Args: log_cli_level (str): openshift installer's log level (default: "DEBUG") """ super(VSPHEREUPI, self).deploy_ocp(log_cli_level) if config.ENV_DATA.get('scale_up'): logger.info("Adding extra nodes to cluster") self.add_nodes() # remove RHCOS compute nodes if (config.ENV_DATA.get('scale_up') and not config.ENV_DATA.get('mixed_cluster')): rhcos_nodes = get_typed_worker_nodes() logger.info(f"RHCOS compute nodes to delete: " f"{[node.name for node in rhcos_nodes]}") logger.info("Removing RHCOS compute nodes from a cluster") remove_nodes(rhcos_nodes) # get datastore type and configure chrony for all nodes ONLY if # datstore type is vsan datastore_type = self.vsphere.get_datastore_type_by_name( self.datastore, self.datacenter) if datastore_type != constants.VMFS: configure_chrony_and_wait_for_machineconfig_status(node_type="all", timeout=1800)
def deploy_ocp(self, log_cli_level="DEBUG"): """ Deployment specific to OCP cluster on vSphere platform Args: log_cli_level (str): openshift installer's log level (default: "DEBUG") """ cluster_name_parts = config.ENV_DATA.get("cluster_name").split("-") prefix = cluster_name_parts[0] if not ( prefix.startswith(tuple(constants.PRODUCTION_JOBS_PREFIX)) or config.DEPLOYMENT.get("force_deploy_multiple_clusters") ): if self.check_cluster_existence(prefix): raise exceptions.SameNamePrefixClusterAlreadyExistsException( f"Cluster with name prefix {prefix} already exists. " f"Please destroy the existing cluster for a new cluster " f"deployment" ) super(VSPHEREUPI, self).deploy_ocp(log_cli_level) if config.ENV_DATA.get("scale_up"): logger.info("Adding extra nodes to cluster") self.add_nodes() # remove RHCOS compute nodes if config.ENV_DATA.get("scale_up") and not config.ENV_DATA.get("mixed_cluster"): rhcos_nodes = get_typed_worker_nodes() logger.info( f"RHCOS compute nodes to delete: " f"{[node.name for node in rhcos_nodes]}" ) logger.info("Removing RHCOS compute nodes from a cluster") remove_nodes(rhcos_nodes) # get datastore type and configure chrony for all nodes ONLY if # datastore type is vsan datastore_type = self.vsphere.get_datastore_type_by_name( self.datastore, self.datacenter ) if datastore_type != constants.VMFS: configure_chrony_and_wait_for_machineconfig_status( node_type="all", timeout=1800 )
def deploy(self, log_cli_level="DEBUG"): """ Deploy """ # Uploading pxe files logger.info("Deploying OCP cluster for Bare Metal platform") logger.info( f"Openshift-installer will be using log level:{log_cli_level}") upload_file( self.host, constants.COMMON_CONF_FILE, os.path.join(self.helper_node_details["bm_dnsmasq_dir"], "dnsmasq.common.conf"), self.user, key_file=self.private_key, ) logger.info("Uploading PXE files") ocp_version = get_ocp_version() float_ocp_version = float(ocp_version) for machine in self.mgmt_details: if self.mgmt_details[machine].get( "cluster_name") or self.mgmt_details[machine].get( "extra_node"): pxe_file_path = self.create_pxe_files( ocp_version=float_ocp_version, role=self.mgmt_details[machine].get("role"), ) upload_file( server=self.host, localpath=pxe_file_path, remotepath=f"{self.helper_node_details['bm_tftp_dir']}" f"/pxelinux.cfg/01-{self.mgmt_details[machine]['mac'].replace(':', '-')}", user=self.user, key_file=self.private_key, ) # Applying Permission cmd = f"chmod 755 -R {self.helper_node_details['bm_tftp_dir']}" self.helper_node_handler.exec_cmd(cmd=cmd) # Applying Permission cmd = f"chmod 755 -R {self.helper_node_details['bm_path_to_upload']}" self.helper_node_handler.exec_cmd(cmd=cmd) # Restarting dnsmasq service cmd = "systemctl restart dnsmasq" assert self.helper_node_handler.exec_cmd( cmd=cmd), "Failed to restart dnsmasq service" # Rebooting Machine with pxe boot for machine in self.mgmt_details: if (self.mgmt_details[machine].get("cluster_name") == constants.BM_DEFAULT_CLUSTER_NAME): secrets = [ self.mgmt_details[machine]["mgmt_username"], self.mgmt_details[machine]["mgmt_password"], ] # Changes boot prioriy to pxe cmd = ( f"ipmitool -I lanplus -U {self.mgmt_details[machine]['mgmt_username']} " f"-P {self.mgmt_details[machine]['mgmt_password']} " f"-H {self.mgmt_details[machine]['mgmt_console']} chassis bootdev pxe" ) run_cmd(cmd=cmd, secrets=secrets) sleep(2) # Power On Machine cmd = ( f"ipmitool -I lanplus -U {self.mgmt_details[machine]['mgmt_username']} " f"-P {self.mgmt_details[machine]['mgmt_password']} " f"-H {self.mgmt_details[machine]['mgmt_console']} chassis power cycle || " f"ipmitool -I lanplus -U {self.mgmt_details[machine]['mgmt_username']} " f"-P {self.mgmt_details[machine]['mgmt_password']} " f"-H {self.mgmt_details[machine]['mgmt_console']} chassis power on" ) run_cmd(cmd=cmd, secrets=secrets) logger.info("waiting for bootstrap to complete") try: run_cmd( f"{self.installer} wait-for bootstrap-complete " f"--dir {self.cluster_path} " f"--log-level {log_cli_level}", timeout=3600, ) except CommandFailed as e: if constants.GATHER_BOOTSTRAP_PATTERN in str(e): try: gather_bootstrap() except Exception as ex: logger.error(ex) raise e OCP.set_kubeconfig(self.kubeconfig) wait_for_all_nodes_csr_and_approve() # wait for image registry to show-up co = "image-registry" wait_for_co(co) # patch image registry to null self.configure_storage_for_image_registry(self.kubeconfig) # wait for install to complete logger.info("waiting for install to complete") run_cmd( f"{self.installer} wait-for install-complete " f"--dir {self.cluster_path} " f"--log-level {log_cli_level}", timeout=1800, ) # Approving CSRs here in-case if any exists approve_pending_csr() self.test_cluster() logger.info("Performing Disk cleanup") clean_disk() # We need NTP for OCS cluster to become clean configure_chrony_and_wait_for_machineconfig_status(node_type="all")
def deploy(self, log_cli_level="DEBUG"): """ Deploy """ # Uploading pxe files master_count = 0 worker_count = 0 logger.info("Deploying OCP cluster for Bare Metal platform") logger.info( f"Openshift-installer will be using log level:{log_cli_level}") upload_file( self.host, constants.COMMON_CONF_FILE, os.path.join(self.helper_node_details["bm_dnsmasq_dir"], "dnsmasq.common.conf"), self.user, key_file=self.private_key, ) logger.info("Uploading PXE files") ocp_version = get_ocp_version() float_ocp_version = float(ocp_version) for machine in self.mgmt_details: if self.mgmt_details[machine].get( "cluster_name") or self.mgmt_details[machine].get( "extra_node"): pxe_file_path = self.create_pxe_files( ocp_version=float_ocp_version, role=self.mgmt_details[machine].get("role"), ) upload_file( server=self.host, localpath=pxe_file_path, remotepath=f"{self.helper_node_details['bm_tftp_dir']}" f"/pxelinux.cfg/01-{self.mgmt_details[machine]['mac'].replace(':', '-')}", user=self.user, key_file=self.private_key, ) # Applying Permission cmd = f"chmod 755 -R {self.helper_node_details['bm_tftp_dir']}" self.helper_node_handler.exec_cmd(cmd=cmd) # Applying Permission cmd = f"chmod 755 -R {self.helper_node_details['bm_path_to_upload']}" self.helper_node_handler.exec_cmd(cmd=cmd) # Restarting dnsmasq service cmd = "systemctl restart dnsmasq" assert self.helper_node_handler.exec_cmd( cmd=cmd), "Failed to restart dnsmasq service" # Rebooting Machine with pxe boot api_record_ip_list = [] apps_record_ip_list = [] response_list = [] cluster_name = f"{constants.BM_DEFAULT_CLUSTER_NAME}" self.aws.delete_hosted_zone(cluster_name=cluster_name, delete_zone=False) for machine in self.mgmt_details: if (self.mgmt_details[machine].get("cluster_name") == constants.BM_DEFAULT_CLUSTER_NAME): if (self.mgmt_details[machine]["role"] == constants.BOOTSTRAP_MACHINE): self.set_pxe_boot_and_reboot(machine) bootstrap_ip = self.mgmt_details[machine]["ip"] api_record_ip_list.append( self.mgmt_details[machine]["ip"]) elif (self.mgmt_details[machine]["role"] == constants.MASTER_MACHINE and master_count < config.ENV_DATA["master_replicas"]): self.set_pxe_boot_and_reboot(machine) api_record_ip_list.append( self.mgmt_details[machine]["ip"]) master_count += 1 elif (self.mgmt_details[machine]["role"] == constants.WORKER_MACHINE and worker_count < config.ENV_DATA["worker_replicas"]): self.set_pxe_boot_and_reboot(machine) apps_record_ip_list.append( self.mgmt_details[machine]["ip"]) worker_count += 1 logger.info("Configuring DNS records") zone_id = self.aws.get_hosted_zone_id(cluster_name=cluster_name) if config.ENV_DATA["worker_replicas"] == 0: apps_record_ip_list = api_record_ip_list for ip in api_record_ip_list: response_list.append( self.aws.update_hosted_zone_record( zone_id=zone_id, record_name=f"api-int.{cluster_name}", data=ip, type="A", operation_type="Add", )) response_list.append( self.aws.update_hosted_zone_record( zone_id=zone_id, record_name=f"api.{cluster_name}", data=ip, type="A", operation_type="Add", )) for ip in apps_record_ip_list: response_list.append( self.aws.update_hosted_zone_record( zone_id=zone_id, record_name=f"*.apps.{cluster_name}", data=ip, type="A", operation_type="Add", )) logger.info("Waiting for Record Response") self.aws.wait_for_record_set(response_list=response_list) logger.info("Records Created Successfully") logger.info("waiting for bootstrap to complete") try: run_cmd( f"{self.installer} wait-for bootstrap-complete " f"--dir {self.cluster_path} " f"--log-level {log_cli_level}", timeout=3600, ) except CommandFailed as e: if constants.GATHER_BOOTSTRAP_PATTERN in str(e): try: gather_bootstrap() except Exception as ex: logger.error(ex) raise e OCP.set_kubeconfig(self.kubeconfig) wait_for_all_nodes_csr_and_approve() # wait for image registry to show-up co = "image-registry" wait_for_co(co) # patch image registry to null self.configure_storage_for_image_registry(self.kubeconfig) # wait for install to complete logger.info("waiting for install to complete") run_cmd( f"{self.installer} wait-for install-complete " f"--dir {self.cluster_path} " f"--log-level {log_cli_level}", timeout=1800, ) logger.info("Removing Bootstrap Ip for DNS Records") self.aws.update_hosted_zone_record( zone_id=zone_id, record_name=f"api-int.{cluster_name}", data=bootstrap_ip, type="A", operation_type="Delete", ) self.aws.update_hosted_zone_record( zone_id=zone_id, record_name=f"api.{cluster_name}", data=bootstrap_ip, type="A", operation_type="Delete", ) # Approving CSRs here in-case if any exists approve_pending_csr() self.test_cluster() logger.info("Performing Disk cleanup") clean_disk() # We need NTP for OCS cluster to become clean worker_timeout = 400 * config.ENV_DATA["worker_replicas"] master_timeout = 400 * config.ENV_DATA["master_replicas"] if master_timeout <= worker_timeout: chrony_timeout = worker_timeout else: chrony_timeout = master_timeout configure_chrony_and_wait_for_machineconfig_status( node_type="all", timeout=chrony_timeout)