def deploy_ocs_via_operator(self, image=None): """ Method for deploy OCS via OCS operator Args: image (str): Image of ocs registry. """ ui_deployment = config.DEPLOYMENT.get("ui_deployment") live_deployment = config.DEPLOYMENT.get("live_deployment") arbiter_deployment = config.DEPLOYMENT.get("arbiter_deployment") if ui_deployment: self.deployment_with_ui() # Skip the rest of the deployment when deploy via UI return else: logger.info("Deployment of OCS via OCS operator") self.label_and_taint_nodes() if config.DEPLOYMENT.get("local_storage"): setup_local_storage(storageclass=self.DEFAULT_STORAGECLASS_LSO) logger.info("Creating namespace and operator group.") run_cmd(f"oc create -f {constants.OLM_YAML}") if config.ENV_DATA["platform"] == constants.IBMCLOUD_PLATFORM: ibmcloud.add_deployment_dependencies() if not live_deployment: create_ocs_secret(self.namespace) create_ocs_secret(constants.MARKETPLACE_NAMESPACE) if not live_deployment: self.create_ocs_operator_source(image) self.subscribe_ocs() operator_selector = get_selector_for_ocs_operator() subscription_plan_approval = config.DEPLOYMENT.get( "subscription_plan_approval") package_manifest = PackageManifest( resource_name=defaults.OCS_OPERATOR_NAME, selector=operator_selector, subscription_plan_approval=subscription_plan_approval, ) package_manifest.wait_for_resource(timeout=300) channel = config.DEPLOYMENT.get("ocs_csv_channel") csv_name = package_manifest.get_current_csv(channel=channel) csv = CSV(resource_name=csv_name, namespace=self.namespace) if (config.ENV_DATA["platform"] == constants.IBMCLOUD_PLATFORM and not live_deployment): csv.wait_for_phase("Installing", timeout=720) logger.info("Sleeping for 30 seconds before applying SA") time.sleep(30) link_all_sa_and_secret(constants.OCS_SECRET, self.namespace) logger.info("Deleting all pods in openshift-storage namespace") exec_cmd(f"oc delete pod --all -n {self.namespace}") csv.wait_for_phase("Succeeded", timeout=720) ocp_version = float(get_ocp_version()) if config.ENV_DATA["platform"] == constants.IBMCLOUD_PLATFORM: config_map = ocp.OCP( kind="configmap", namespace=self.namespace, resource_name=constants.ROOK_OPERATOR_CONFIGMAP, ) config_map.get(retry=10, wait=5) config_map_patch = ( '\'{"data": {"ROOK_CSI_KUBELET_DIR_PATH": "/var/data/kubelet"}}\'' ) logger.info("Patching config map to change KUBLET DIR PATH") exec_cmd( f"oc patch configmap -n {self.namespace} " f"{constants.ROOK_OPERATOR_CONFIGMAP} -p {config_map_patch}") if config.DEPLOYMENT.get("create_ibm_cos_secret", True): logger.info("Creating secret for IBM Cloud Object Storage") with open(constants.IBM_COS_SECRET_YAML, "r") as cos_secret_fd: cos_secret_data = yaml.load(cos_secret_fd, Loader=yaml.SafeLoader) key_id = config.AUTH["ibmcloud"]["ibm_cos_access_key_id"] key_secret = config.AUTH["ibmcloud"][ "ibm_cos_secret_access_key"] cos_secret_data["data"]["IBM_COS_ACCESS_KEY_ID"] = key_id cos_secret_data["data"][ "IBM_COS_SECRET_ACCESS_KEY"] = key_secret cos_secret_data_yaml = tempfile.NamedTemporaryFile( mode="w+", prefix="cos_secret", delete=False) templating.dump_data_to_temp_yaml(cos_secret_data, cos_secret_data_yaml.name) exec_cmd(f"oc create -f {cos_secret_data_yaml.name}") # Modify the CSV with custom values if required if all(key in config.DEPLOYMENT for key in ("csv_change_from", "csv_change_to")): modify_csv( csv=csv_name, replace_from=config.DEPLOYMENT["csv_change_from"], replace_to=config.DEPLOYMENT["csv_change_to"], ) # create custom storage class for StorageCluster CR if necessary if self.CUSTOM_STORAGE_CLASS_PATH is not None: with open(self.CUSTOM_STORAGE_CLASS_PATH, "r") as custom_sc_fo: custom_sc = yaml.load(custom_sc_fo, Loader=yaml.SafeLoader) # set value of DEFAULT_STORAGECLASS to mach the custom storage cls self.DEFAULT_STORAGECLASS = custom_sc["metadata"]["name"] run_cmd(f"oc create -f {self.CUSTOM_STORAGE_CLASS_PATH}") # creating StorageCluster if config.DEPLOYMENT.get("kms_deployment"): kms = KMS.get_kms_deployment() kms.deploy() cluster_data = templating.load_yaml(constants.STORAGE_CLUSTER_YAML) if arbiter_deployment: cluster_data["spec"]["arbiter"] = {} cluster_data["spec"]["nodeTopologies"] = {} cluster_data["spec"]["arbiter"]["enable"] = True cluster_data["spec"]["nodeTopologies"][ "arbiterLocation"] = self.get_arbiter_location() cluster_data["spec"]["storageDeviceSets"][0][ "replica"] = config.DEPLOYMENT.get( "ocs_operator_nodes_to_label", 4) cluster_data["metadata"]["name"] = config.ENV_DATA[ "storage_cluster_name"] deviceset_data = cluster_data["spec"]["storageDeviceSets"][0] device_size = int( config.ENV_DATA.get("device_size", defaults.DEVICE_SIZE)) logger.info( "Flexible scaling is available from version 4.7 on LSO cluster with less than 3 zones" ) ocs_version = config.ENV_DATA["ocs_version"] zone_num = get_az_count() if (config.DEPLOYMENT.get("local_storage") and Version.coerce(ocs_version) >= Version.coerce("4.7") and zone_num < 3): cluster_data["spec"]["flexibleScaling"] = True # https://bugzilla.redhat.com/show_bug.cgi?id=1921023 cluster_data["spec"]["storageDeviceSets"][0]["count"] = 3 cluster_data["spec"]["storageDeviceSets"][0]["replica"] = 1 # set size of request for storage if self.platform.lower() == constants.BAREMETAL_PLATFORM: pv_size_list = helpers.get_pv_size( storageclass=self.DEFAULT_STORAGECLASS_LSO) pv_size_list.sort() deviceset_data["dataPVCTemplate"]["spec"]["resources"]["requests"][ "storage"] = f"{pv_size_list[0]}" else: deviceset_data["dataPVCTemplate"]["spec"]["resources"]["requests"][ "storage"] = f"{device_size}Gi" # set storage class to OCS default on current platform if self.DEFAULT_STORAGECLASS: deviceset_data["dataPVCTemplate"]["spec"][ "storageClassName"] = self.DEFAULT_STORAGECLASS ocs_version = float(config.ENV_DATA["ocs_version"]) # StorageCluster tweaks for LSO if config.DEPLOYMENT.get("local_storage"): cluster_data["spec"]["manageNodes"] = False cluster_data["spec"]["monDataDirHostPath"] = "/var/lib/rook" deviceset_data["name"] = constants.DEFAULT_DEVICESET_LSO_PVC_NAME deviceset_data["portable"] = False deviceset_data["dataPVCTemplate"]["spec"][ "storageClassName"] = self.DEFAULT_STORAGECLASS_LSO lso_type = config.DEPLOYMENT.get("type") if (self.platform.lower() == constants.AWS_PLATFORM and not lso_type == constants.AWS_EBS): deviceset_data["count"] = 2 if ocs_version >= 4.5: deviceset_data["resources"] = { "limits": { "cpu": 2, "memory": "5Gi" }, "requests": { "cpu": 1, "memory": "5Gi" }, } if (ocp_version >= 4.6) and (ocs_version >= 4.6): cluster_data["metadata"]["annotations"] = { "cluster.ocs.openshift.io/local-devices": "true" } # Allow lower instance requests and limits for OCS deployment # The resources we need to change can be found here: # https://github.com/openshift/ocs-operator/blob/release-4.5/pkg/deploy-manager/storagecluster.go#L88-L116 if config.DEPLOYMENT.get("allow_lower_instance_requirements"): none_resources = {"Requests": None, "Limits": None} deviceset_data["resources"] = deepcopy(none_resources) resources = [ "mon", "mds", "rgw", "mgr", "noobaa-core", "noobaa-db", ] if ocs_version >= 4.5: resources.append("noobaa-endpoint") cluster_data["spec"]["resources"] = { resource: deepcopy(none_resources) for resource in resources } if ocs_version >= 4.5: cluster_data["spec"]["resources"]["noobaa-endpoint"] = { "limits": { "cpu": 1, "memory": "500Mi" }, "requests": { "cpu": 1, "memory": "500Mi" }, } else: local_storage = config.DEPLOYMENT.get("local_storage") platform = config.ENV_DATA.get("platform", "").lower() if local_storage and platform == "aws": resources = { "mds": { "limits": { "cpu": 3, "memory": "8Gi" }, "requests": { "cpu": 1, "memory": "8Gi" }, } } if ocs_version < 4.5: resources["noobaa-core"] = { "limits": { "cpu": 2, "memory": "8Gi" }, "requests": { "cpu": 1, "memory": "8Gi" }, } resources["noobaa-db"] = { "limits": { "cpu": 2, "memory": "8Gi" }, "requests": { "cpu": 1, "memory": "8Gi" }, } cluster_data["spec"]["resources"] = resources # Enable host network if enabled in config (this require all the # rules to be enabled on underlaying platform). if config.DEPLOYMENT.get("host_network"): cluster_data["spec"]["hostNetwork"] = True cluster_data["spec"]["storageDeviceSets"] = [deviceset_data] if self.platform == constants.IBMCLOUD_PLATFORM: mon_pvc_template = { "spec": { "accessModes": ["ReadWriteOnce"], "resources": { "requests": { "storage": "20Gi" } }, "storageClassName": self.DEFAULT_STORAGECLASS, "volumeMode": "Filesystem", } } cluster_data["spec"]["monPVCTemplate"] = mon_pvc_template # Need to check if it's needed for ibm cloud to set manageNodes cluster_data["spec"]["manageNodes"] = False if config.ENV_DATA.get("encryption_at_rest"): if ocs_version < 4.6: error_message = "Encryption at REST can be enabled only on OCS >= 4.6!" logger.error(error_message) raise UnsupportedFeatureError(error_message) logger.info("Enabling encryption at REST!") cluster_data["spec"]["encryption"] = { "enable": True, } if config.DEPLOYMENT.get("kms_deployment"): cluster_data["spec"]["encryption"]["kms"] = { "enable": True, } if config.DEPLOYMENT.get("ceph_debug"): setup_ceph_debug() cluster_data["spec"]["managedResources"] = { "cephConfig": { "reconcileStrategy": "ignore" } } cluster_data_yaml = tempfile.NamedTemporaryFile( mode="w+", prefix="cluster_storage", delete=False) templating.dump_data_to_temp_yaml(cluster_data, cluster_data_yaml.name) run_cmd(f"oc create -f {cluster_data_yaml.name}", timeout=1200) if config.DEPLOYMENT["infra_nodes"]: _ocp = ocp.OCP(kind="node") _ocp.exec_oc_cmd( command=f"annotate namespace {defaults.ROOK_CLUSTER_NAMESPACE} " f"{constants.NODE_SELECTOR_ANNOTATION}")
def __init__(self, driver): super().__init__(driver) ocp_version = get_ocp_version() self.pvc_loc = locators[ocp_version]["pvc"]
class TestLvmMultiClone(ManageTest): """ Test multi clone and restore for LVM """ ocp_version = get_ocp_version() pvc_size = 100 access_mode = constants.ACCESS_MODE_RWO pvc_num = 5 @tier1 @acceptance @skipif_lvm_not_installed @skipif_ocs_version("<4.11") def test_create_multi_clone_from_pvc( self, volume_mode, volume_binding_mode, project_factory, lvm_storageclass_factory, pvc_clone_factory, pvc_factory, pod_factory, ): """ test create delete multi snapshot .* Create 5 PVC .* Create 5 POD .* Run IO .* Create 5 clones .* Create 5 pvc from clone .* Attach 5 pod .* Run IO """ lvm = LVM(fstrim=True, fail_on_thin_pool_not_empty=True) logger.info(f"LVMCluster version is {lvm.get_lvm_version()}") logger.info( f"Lvm thin-pool overprovisionRation is {lvm.get_lvm_thin_pool_config_overprovision_ratio()}" ) logger.info( f"Lvm thin-pool sizePercent is {lvm.get_lvm_thin_pool_config_size_percent()}" ) proj_obj = project_factory() sc_obj = lvm_storageclass_factory(volume_binding_mode) status = constants.STATUS_PENDING if volume_binding_mode == constants.IMMEDIATE_VOLUMEBINDINGMODE: status = constants.STATUS_BOUND futures = [] executor = concurrent.futures.ThreadPoolExecutor( max_workers=self.pvc_num) for exec_num in range(0, self.pvc_num): futures.append( executor.submit( pvc_factory, project=proj_obj, interface=None, storageclass=sc_obj, size=self.pvc_size, status=status, access_mode=self.access_mode, volume_mode=volume_mode, )) pvc_objs = [] for future in concurrent.futures.as_completed(futures): pvc_objs.append(future.result()) block = False executor = concurrent.futures.ThreadPoolExecutor( max_workers=self.pvc_num) if volume_mode == constants.VOLUME_MODE_BLOCK: block = True futures_pods = [] pods_objs = [] for pvc in pvc_objs: futures_pods.append( executor.submit(pod_factory, pvc=pvc, raw_block_pv=block)) for future_pod in concurrent.futures.as_completed(futures_pods): pods_objs.append(future_pod.result()) storage_type = "fs" block = False if volume_mode == constants.VOLUME_MODE_BLOCK: storage_type = "block" block = True executor = concurrent.futures.ThreadPoolExecutor( max_workers=self.pvc_num) futures_fio = [] for pod in pods_objs: futures_fio.append( executor.submit( pod.run_io, storage_type, size="5g", rate="1500m", runtime=0, invalidate=0, buffer_compress_percentage=60, buffer_pattern="0xdeadface", bs="1024K", jobs=1, readwrite="readwrite", )) for _ in concurrent.futures.as_completed(futures_fio): logger.info("Some pod submitted FIO") concurrent.futures.wait(futures_fio) executor = concurrent.futures.ThreadPoolExecutor( max_workers=self.pvc_num) futures_results = [] for pod in pods_objs: futures_results.append(executor.submit(pod.get_fio_results())) for _ in concurrent.futures.as_completed(futures_results): logger.info("Just waiting for fio jobs results") concurrent.futures.wait(futures_results) origin_pods_md5 = [] if not block: executor = concurrent.futures.ThreadPoolExecutor( max_workers=self.pvc_num) futures_md5 = [] for pod in pods_objs: futures_md5.append( executor.submit( cal_md5sum, pod_obj=pod, file_name="fio-rand-readwrite", block=block, )) for future_md5 in concurrent.futures.as_completed(futures_md5): origin_pods_md5.append(future_md5.result()) logger.info("Creating snapshot from pvc objects") executor = concurrent.futures.ThreadPoolExecutor( max_workers=self.pvc_num) futures_clone = [] clone_objs = [] for pvc in pvc_objs: futures_clone.append( executor.submit( pvc_clone_factory, pvc_obj=pvc, status=status, volume_mode=volume_mode, )) for future_clone in concurrent.futures.as_completed(futures_clone): clone_objs.append(future_clone.result()) concurrent.futures.wait(futures_clone) logger.info("Attaching pods to pvcs restores") executor = concurrent.futures.ThreadPoolExecutor( max_workers=self.pvc_num) futures_restored_pods = [] restored_pods_objs = [] for pvc in clone_objs: futures_restored_pods.append( executor.submit(pod_factory, pvc=pvc, raw_block_pv=block)) for future_restored_pod in concurrent.futures.as_completed( futures_restored_pods): restored_pods_objs.append(future_restored_pod.result()) concurrent.futures.wait(futures_restored_pods) time.sleep(10) if not block: executor = concurrent.futures.ThreadPoolExecutor( max_workers=self.pvc_num) futures_restored_pods_md5 = [] restored_pods_md5 = [] for restored_pod in restored_pods_objs: futures_restored_pods_md5.append( executor.submit( cal_md5sum, pod_obj=restored_pod, file_name="fio-rand-readwrite", block=block, )) for future_restored_pod_md5 in concurrent.futures.as_completed( futures_restored_pods_md5): restored_pods_md5.append(future_restored_pod_md5.result()) for pod_num in range(0, self.pvc_num): if origin_pods_md5[pod_num] != restored_pods_md5[pod_num]: raise Md5CheckFailed( f"origin pod {pods_objs[pod_num]} md5 value {origin_pods_md5[pod_num]} " f"is not the same as restored pod {restored_pods_objs[pod_num]} md5 " f"value {restored_pods_md5[pod_num]}") executor = concurrent.futures.ThreadPoolExecutor( max_workers=self.pvc_num) futures_restored_pods_fio = [] for restored_pod in restored_pods_objs: futures_restored_pods_fio.append( executor.submit( restored_pod.run_io, storage_type, size="1g", rate="1500m", runtime=0, invalidate=0, buffer_compress_percentage=60, buffer_pattern="0xdeadface", bs="1024K", jobs=1, readwrite="readwrite", )) for _ in concurrent.futures.as_completed(futures_restored_pods_fio): logger.info("Waiting for all fio pods submission") concurrent.futures.wait(futures_restored_pods_fio) executor = concurrent.futures.ThreadPoolExecutor( max_workers=self.pvc_num) futures_restored_pods_fio_results = [] for restored_pod in restored_pods_objs: futures_restored_pods_fio_results.append( executor.submit(restored_pod.get_fio_results())) for _ in concurrent.futures.as_completed( futures_restored_pods_fio_results): logger.info("Finished waiting for some pod") concurrent.futures.wait(futures_restored_pods_fio_results) if not block: executor = concurrent.futures.ThreadPoolExecutor( max_workers=self.pvc_num) futures_restored_pods_md5_after_fio = [] restored_pods_md5_after_fio = [] for restored_pod in restored_pods_objs: futures_restored_pods_md5_after_fio.append( executor.submit( cal_md5sum, pod_obj=restored_pod, file_name="fio-rand-readwrite", block=block, )) for future_restored_pods_md5_after_fio in concurrent.futures.as_completed( futures_restored_pods_md5_after_fio): restored_pods_md5_after_fio.append( future_restored_pods_md5_after_fio.result()) for pod_num in range(0, self.pvc_num): if restored_pods_md5_after_fio[pod_num] == origin_pods_md5[ pod_num]: raise Md5CheckFailed( f"origin pod {pods_objs[pod_num].name} md5 value {origin_pods_md5[pod_num]} " f"is not suppose to be the same as restored pod {restored_pods_objs[pod_num].name} md5 " f"value {restored_pods_md5_after_fio[pod_num]}")
def __init__(self, driver): super().__init__(driver) self.ocp_version = get_ocp_version() self.err_list = list() self.validation_loc = locators[self.ocp_version]["validation"]
def deploy(self, log_cli_level='DEBUG'): """ Deployment specific to OCP cluster on this platform Args: log_cli_level (str): openshift installer's log level (default: "DEBUG") """ logger.info("Deploying OCP cluster for vSphere platform") logger.info( f"Openshift-installer will be using loglevel:{log_cli_level}") os.chdir(self.terraform_data_dir) self.terraform.initialize() self.terraform.apply(self.terraform_var) os.chdir(self.previous_dir) logger.info("waiting for bootstrap to complete") try: run_cmd( f"{self.installer} wait-for bootstrap-complete " f"--dir {self.cluster_path} " f"--log-level {log_cli_level}", timeout=3600) except CommandFailed as e: if constants.GATHER_BOOTSTRAP_PATTERN in str(e): try: gather_bootstrap() except Exception as ex: logger.error(ex) raise e if self.folder_structure: # comment bootstrap module comment_bootstrap_in_lb_module() # remove bootstrap IP in load balancer and # restart haproxy lb = LoadBalancer() lb.remove_boostrap_in_proxy() lb.restart_haproxy() # remove bootstrap node if not config.DEPLOYMENT['preserve_bootstrap_node']: logger.info("removing bootstrap node") os.chdir(self.terraform_data_dir) if self.folder_structure: self.terraform.destroy_module(self.terraform_var, constants.BOOTSTRAP_MODULE) else: self.terraform.apply(self.terraform_var, bootstrap_complete=True) os.chdir(self.previous_dir) OCP.set_kubeconfig(self.kubeconfig) # wait for all nodes to generate CSR # From OCP version 4.4 and above, we have to approve CSR manually # for all the nodes ocp_version = get_ocp_version() if Version.coerce(ocp_version) >= Version.coerce('4.4'): wait_for_all_nodes_csr_and_approve(timeout=1200, sleep=30) # wait for image registry to show-up co = "image-registry" wait_for_co(co) # patch image registry to null self.configure_storage_for_image_registry(self.kubeconfig) # wait for install to complete logger.info("waiting for install to complete") run_cmd( f"{self.installer} wait-for install-complete " f"--dir {self.cluster_path} " f"--log-level {log_cli_level}", timeout=1800) # Approving CSRs here in-case if any exists approve_pending_csr() self.test_cluster()
def destroy_cluster(self, log_level="DEBUG"): """ Destroy OCP cluster specific to vSphere UPI Args: log_level (str): log level openshift-installer (default: DEBUG) """ previous_dir = os.getcwd() # Download terraform binary based on terraform version # in terraform.log terraform_log_path = os.path.join(config.ENV_DATA.get('cluster_path'), config.ENV_DATA.get('TF_LOG_FILE')) # check for terraform.log, this check is for partially # deployed clusters try: with open(terraform_log_path, 'r') as fd: logger.debug( f"Reading terraform version from {terraform_log_path}") version_line = fd.readline() terraform_version = version_line.split()[-1] except FileNotFoundError: logger.debug(f"{terraform_log_path} file not found") terraform_version = config.DEPLOYMENT['terraform_version'] terraform_installer = get_terraform(version=terraform_version) config.ENV_DATA['terraform_installer'] = terraform_installer # delete the extra disks self.delete_disks() # check whether cluster has scale-up nodes scale_up_terraform_data_dir = os.path.join( self.cluster_path, constants.TERRAFORM_DATA_DIR, constants.SCALEUP_TERRAFORM_DATA_DIR) scale_up_terraform_var = os.path.join(scale_up_terraform_data_dir, "scale_up_terraform.tfvars") if os.path.exists(scale_up_terraform_var): os.chdir(scale_up_terraform_data_dir) self.destroy_scaleup_nodes(scale_up_terraform_data_dir, scale_up_terraform_var) os.chdir(previous_dir) terraform_data_dir = os.path.join(self.cluster_path, constants.TERRAFORM_DATA_DIR) upi_repo_path = os.path.join( constants.EXTERNAL_DIR, 'installer', ) tfvars = os.path.join(config.ENV_DATA.get('cluster_path'), constants.TERRAFORM_DATA_DIR, constants.TERRAFORM_VARS) clone_openshift_installer() if (os.path.exists(f"{constants.VSPHERE_MAIN}.backup") and os.path.exists(f"{constants.VSPHERE_MAIN}.json")): os.rename(f"{constants.VSPHERE_MAIN}.json", f"{constants.VSPHERE_MAIN}.json.backup") # getting OCP version here since we run destroy job as # separate job in jenkins ocp_version = get_ocp_version() self.folder_structure = False if Version.coerce(ocp_version) >= Version.coerce('4.5'): set_aws_region() self.folder_structure = True # terraform initialization and destroy cluster terraform = Terraform(os.path.join(upi_repo_path, "upi/vsphere/")) os.chdir(terraform_data_dir) terraform.initialize(upgrade=True) terraform.destroy(tfvars, refresh=(not self.folder_structure)) os.chdir(previous_dir) # post destroy checks self.post_destroy_checks()
def __init__(self, driver): super().__init__(driver) self.wait = WebDriverWait(self.driver, 30) ocp_version = get_ocp_version() self.ocs_loc = locators[ocp_version]["ocs_operator"] self.mcg_stores = locators[ocp_version]["mcg_stores"]
def __init__(self, driver): super().__init__(driver) ocp_version = get_ocp_version() self.ocs_loc = locators[ocp_version]["ocs_operator"] self.bucketclass = locators[ocp_version]["bucketclass"]
def __init__(self, driver): super().__init__(driver) ocp_version = get_ocp_version() self.page_nav = locators[ocp_version]["page"]
def add_capacity(osd_size_capacity_requested, add_extra_disk_to_existing_worker=True): """ Add storage capacity to the cluster Args: osd_size_capacity_requested(int): Requested osd size capacity add_extra_disk_to_existing_worker(bool): Add Disk if True Returns: new storage device set count (int) : Returns True if all OSDs are in Running state Note: "StoragedeviceSets->count" represents the set of 3 OSDs. That is, if there are 3 OSDs in the system then count will be 1. If there are 6 OSDs then count is 2 and so on. By changing this value,we can add extra devices to the cluster. For example, if we want to expand the cluster by 3 more osds in a cluster that already has 3 osds, we can set count as 2. So, with each increase of count by 1, we get 3 OSDs extra added to the cluster. This is how we are going to 'add capacity' via automation. As we know that OCS has 3 way replica. That is, same data is placed in 3 OSDs. Because of this, the total usable capacity for apps from 3 OSDs will be the size of one OSD (all osds are of same size). If we want to add more capacity to the cluster then we need to add 3 OSDs of same size as that of the original OSD. add_capacity needs to accept the 'capacity_to_add' as an argument. From this we need to arrive at storagedeviceSets -> count and then "Patch" this count to get the required capacity to add. To do so, we use following formula: storageDeviceSets->count = (capacity reqested / osd capacity ) + existing count storageDeviceSets """ lvpresent = None lv_set_present = None osd_size_existing = get_osd_size() device_sets_required = int(osd_size_capacity_requested / osd_size_existing) old_storage_devices_sets_count = get_deviceset_count() new_storage_devices_sets_count = int(device_sets_required + old_storage_devices_sets_count) is_lso = config.DEPLOYMENT.get("local_storage") if is_lso: lv_lvs_data = localstorage.check_local_volume_local_volume_set() if lv_lvs_data.get("localvolume"): lvpresent = True elif lv_lvs_data.get("localvolumeset"): lv_set_present = True else: log.info(lv_lvs_data) raise ResourceNotFoundError( "No LocalVolume and LocalVolume Set found") ocp_version = get_ocp_version() platform = config.ENV_DATA.get("platform", "").lower() if (ocp_version == "4.7" and (platform == constants.AWS_PLATFORM or platform == constants.VSPHERE_PLATFORM) and (not is_lso)): logging.info("Add capacity via UI") setup_ui = login_ui() add_ui_obj = AddReplaceDeviceUI(setup_ui) add_ui_obj.add_capacity_ui() close_browser(setup_ui) else: if lvpresent: ocp_obj = OCP(kind="localvolume", namespace=config.ENV_DATA["local_storage_namespace"]) localvolume_data = ocp_obj.get(resource_name="local-block") device_list = localvolume_data["spec"]["storageClassDevices"][0][ "devicePaths"] final_device_list = localstorage.get_new_device_paths( device_sets_required, osd_size_capacity_requested) device_list.sort() final_device_list.sort() if device_list == final_device_list: raise ResourceNotFoundError("No Extra device found") param = f"""[{{ "op": "replace", "path": "/spec/storageClassDevices/0/devicePaths", "value": {final_device_list}}}]""" log.info(f"Final device list : {final_device_list}") lvcr = localstorage.get_local_volume_cr() log.info("Patching Local Volume CR...") lvcr.patch( resource_name=lvcr.get()["items"][0]["metadata"]["name"], params=param.strip("\n"), format_type="json", ) localstorage.check_pvs_created( int(len(final_device_list) / new_storage_devices_sets_count)) if lv_set_present: if check_pvs_present_for_ocs_expansion(): log.info("Found Extra PV") else: if (platform == constants.VSPHERE_PLATFORM and add_extra_disk_to_existing_worker): log.info("No Extra PV found") log.info( "Adding Extra Disk to existing VSphere Worker nodes") add_disk_for_vsphere_platform() else: raise PVNotSufficientException( f"No Extra PV found in {constants.OPERATOR_NODE_LABEL}" ) sc = get_storage_cluster() # adding the storage capacity to the cluster params = f"""[{{ "op": "replace", "path": "/spec/storageDeviceSets/0/count", "value": {new_storage_devices_sets_count}}}]""" sc.patch( resource_name=sc.get()["items"][0]["metadata"]["name"], params=params.strip("\n"), format_type="json", ) return new_storage_devices_sets_count
def destroy_cluster(self, log_level="DEBUG"): """ Destroy OCP cluster specific to vSphere UPI Args: log_level (str): log level openshift-installer (default: DEBUG) """ previous_dir = os.getcwd() # Download terraform binary based on terraform version # in terraform.log terraform_log_path = os.path.join(config.ENV_DATA.get("cluster_path"), config.ENV_DATA.get("TF_LOG_FILE")) # check for terraform.log, this check is for partially # deployed clusters try: with open(terraform_log_path, "r") as fd: logger.debug( f"Reading terraform version from {terraform_log_path}") version_line = fd.readline() terraform_version = version_line.split()[-1] except FileNotFoundError: logger.debug(f"{terraform_log_path} file not found") terraform_version = config.DEPLOYMENT["terraform_version"] terraform_installer = get_terraform(version=terraform_version) config.ENV_DATA["terraform_installer"] = terraform_installer # getting OCP version here since we run destroy job as # separate job in jenkins ocp_version = get_ocp_version() self.folder_structure = False if Version.coerce(ocp_version) >= Version.coerce("4.5"): set_aws_region() self.folder_structure = True config.ENV_DATA["folder_structure"] = self.folder_structure # delete the extra disks self.delete_disks() # check whether cluster has scale-up nodes scale_up_terraform_data_dir = os.path.join( self.cluster_path, constants.TERRAFORM_DATA_DIR, constants.SCALEUP_TERRAFORM_DATA_DIR, ) scale_up_terraform_var = os.path.join(scale_up_terraform_data_dir, "scale_up_terraform.tfvars") if os.path.exists(scale_up_terraform_var): os.chdir(scale_up_terraform_data_dir) self.destroy_scaleup_nodes(scale_up_terraform_data_dir, scale_up_terraform_var) os.chdir(previous_dir) terraform_data_dir = os.path.join(self.cluster_path, constants.TERRAFORM_DATA_DIR) upi_repo_path = os.path.join( constants.EXTERNAL_DIR, "installer", ) tfvars = os.path.join( config.ENV_DATA.get("cluster_path"), constants.TERRAFORM_DATA_DIR, constants.TERRAFORM_VARS, ) clone_openshift_installer() if os.path.exists( f"{constants.VSPHERE_MAIN}.backup") and os.path.exists( f"{constants.VSPHERE_MAIN}.json"): os.rename( f"{constants.VSPHERE_MAIN}.json", f"{constants.VSPHERE_MAIN}.json.backup", ) # terraform initialization and destroy cluster terraform = Terraform(os.path.join(upi_repo_path, "upi/vsphere/")) os.chdir(terraform_data_dir) if Version.coerce(ocp_version) >= Version.coerce("4.6"): # Download terraform ignition provider. For OCP upgrade clusters, # ignition provider doesn't exist, so downloading in destroy job # as well terraform_plugins_path = ".terraform/plugins/linux_amd64/" terraform_ignition_provider_path = os.path.join( terraform_data_dir, terraform_plugins_path, "terraform-provider-ignition", ) # check the upgrade history of cluster and checkout to the # original installer release. This is due to the issue of not # supporting terraform state of OCP 4.5 in installer # release of 4.6 branch. More details in # https://github.com/red-hat-storage/ocs-ci/issues/2941 is_cluster_upgraded = False try: upgrade_history = get_ocp_upgrade_history() if len(upgrade_history) > 1: is_cluster_upgraded = True original_installed_ocp_version = upgrade_history[-1] installer_release_branch = ( f"release-{original_installed_ocp_version[0:3]}") clone_repo( constants.VSPHERE_INSTALLER_REPO, upi_repo_path, installer_release_branch, ) except Exception as ex: logger.error(ex) if not (os.path.exists(terraform_ignition_provider_path) or is_cluster_upgraded): get_terraform_ignition_provider(terraform_data_dir) terraform.initialize() else: terraform.initialize(upgrade=True) terraform.destroy(tfvars, refresh=(not self.folder_structure)) os.chdir(previous_dir) # post destroy checks self.post_destroy_checks()
def prepare_disconnected_ocs_deployment(): """ Prepare disconnected ocs deployment: - get related images from OCS operator bundle csv - mirror related images to mirror registry - create imageContentSourcePolicy for the mirrored images - disable the default OperatorSources Returns: str: OCS registry image prepared for disconnected installation (with sha256 digest) or None (for live deployment) """ logger.info("Prepare for disconnected OCS installation") if config.DEPLOYMENT.get("live_deployment"): get_opm_tool() pull_secret_path = os.path.join(constants.TOP_DIR, "data", "pull-secret") ocp_version = get_ocp_version() index_image = f"{config.DEPLOYMENT['cs_redhat_operators_image']}:v{ocp_version}" mirrored_index_image = ( f"{config.DEPLOYMENT['mirror_registry']}/{constants.MIRRORED_INDEX_IMAGE_NAMESPACE}/" f"{constants.MIRRORED_INDEX_IMAGE_NAME}:v{ocp_version}" ) # prune an index image logger.info( f"Prune index image {index_image} -> {mirrored_index_image} " f"(packages: {', '.join(constants.DISCON_CL_REQUIRED_PACKAGES)})" ) cmd = ( f"opm index prune -f {index_image} " f"-p {','.join(constants.DISCON_CL_REQUIRED_PACKAGES)} " f"-t {mirrored_index_image}" ) # opm tool doesn't have --atuhfile parameter, we have to suply auth # file through env variable os.environ["REGISTRY_AUTH_FILE"] = pull_secret_path exec_cmd(cmd) # login to mirror registry login_to_mirror_registry(pull_secret_path) # push pruned index image to mirror registry logger.info( f"Push pruned index image to mirror registry: {mirrored_index_image}" ) cmd = f"podman push --authfile {pull_secret_path} --tls-verify=false {mirrored_index_image}" exec_cmd(cmd) # mirror related images (this might take very long time) logger.info(f"Mirror images related to index image: {mirrored_index_image}") cmd = ( f"oc adm catalog mirror {mirrored_index_image} -a {pull_secret_path} --insecure " f"{config.DEPLOYMENT['mirror_registry']} --index-filter-by-os='.*'" ) oc_acm_result = exec_cmd(cmd, timeout=7200) for line in oc_acm_result.stdout.decode("utf-8").splitlines(): if "wrote mirroring manifests to" in line: break else: raise NotFoundError( "Manifests directory not printed to stdout of 'oc adm catalog mirror ...' command." ) mirroring_manifests_dir = line.replace("wrote mirroring manifests to ", "") logger.debug(f"Mirrored manifests directory: {mirroring_manifests_dir}") # create ImageContentSourcePolicy icsp_file = os.path.join( f"{mirroring_manifests_dir}", "imageContentSourcePolicy.yaml", ) exec_cmd(f"oc apply -f {icsp_file}") # Disable the default OperatorSources exec_cmd( """oc patch OperatorHub cluster --type json """ """-p '[{"op": "add", "path": "/spec/disableAllDefaultSources", "value": true}]'""" ) # create redhat-operators CatalogSource catalog_source_data = templating.load_yaml(constants.CATALOG_SOURCE_YAML) catalog_source_manifest = tempfile.NamedTemporaryFile( mode="w+", prefix="catalog_source_manifest", delete=False ) catalog_source_data["spec"]["image"] = f"{mirrored_index_image}" catalog_source_data["metadata"]["name"] = "redhat-operators" catalog_source_data["spec"]["displayName"] = "Red Hat Operators - Mirrored" templating.dump_data_to_temp_yaml( catalog_source_data, catalog_source_manifest.name ) exec_cmd(f"oc apply -f {catalog_source_manifest.name}") catalog_source = CatalogSource( resource_name="redhat-operators", namespace=constants.MARKETPLACE_NAMESPACE, ) # Wait for catalog source is ready catalog_source.wait_for_state("READY") return if config.DEPLOYMENT.get("stage_rh_osbs"): raise NotImplementedError( "Disconnected installation from stage is not implemented!" ) ocs_registry_image = config.DEPLOYMENT.get("ocs_registry_image", "") logger.debug(f"ocs-registry-image: {ocs_registry_image}") ocs_registry_image_and_tag = ocs_registry_image.split(":") ocs_registry_image = ocs_registry_image_and_tag[0] image_tag = ( ocs_registry_image_and_tag[1] if len(ocs_registry_image_and_tag) == 2 else None ) if not image_tag and config.REPORTING.get("us_ds") == "DS": image_tag = get_latest_ds_olm_tag( upgrade=False, latest_tag=config.DEPLOYMENT.get("default_latest_tag", "latest"), ) ocs_registry_image = f"{config.DEPLOYMENT['default_ocs_registry_image'].split(':')[0]}:{image_tag}" bundle_image = f"{constants.OCS_OPERATOR_BUNDLE_IMAGE}:{image_tag}" logger.debug(f"ocs-operator-bundle image: {bundle_image}") csv_yaml = get_csv_from_image(bundle_image) ocs_operator_image = ( csv_yaml.get("spec", {}) .get("install", {}) .get("spec", {}) .get("deployments", [{}])[0] .get("spec", {}) .get("template", {}) .get("spec", {}) .get("containers", [{}])[0] .get("image") ) logger.debug(f"ocs-operator-image: {ocs_operator_image}") # prepare list related images (bundle, registry and operator images and all # images from relatedImages section from csv) ocs_related_images = [] ocs_related_images.append(get_image_with_digest(bundle_image)) ocs_registry_image_with_digest = get_image_with_digest(ocs_registry_image) ocs_related_images.append(ocs_registry_image_with_digest) ocs_related_images.append(get_image_with_digest(ocs_operator_image)) ocs_related_images += [ image["image"] for image in csv_yaml.get("spec").get("relatedImages") ] logger.debug(f"OCS Related Images: {ocs_related_images}") mirror_registry = config.DEPLOYMENT["mirror_registry"] # prepare images mapping file for mirroring mapping_file_content = [ f"{image}={mirror_registry}{image[image.index('/'):image.index('@')]}\n" for image in ocs_related_images ] logger.debug(f"Mapping file content: {mapping_file_content}") name = "ocs-images" mapping_file = os.path.join(config.ENV_DATA["cluster_path"], f"{name}-mapping.txt") # write mapping file to disk with open(mapping_file, "w") as f: f.writelines(mapping_file_content) # prepare ImageContentSourcePolicy for OCS images with open(constants.TEMPLATE_IMAGE_CONTENT_SOURCE_POLICY_YAML) as f: ocs_icsp = yaml.safe_load(f) ocs_icsp["metadata"]["name"] = name ocs_icsp["spec"]["repositoryDigestMirrors"] = [] for image in ocs_related_images: ocs_icsp["spec"]["repositoryDigestMirrors"].append( { "mirrors": [ f"{mirror_registry}{image[image.index('/'):image.index('@')]}" ], "source": image[: image.index("@")], } ) logger.debug(f"OCS imageContentSourcePolicy: {yaml.safe_dump(ocs_icsp)}") ocs_icsp_file = os.path.join( config.ENV_DATA["cluster_path"], f"{name}-imageContentSourcePolicy.yaml" ) with open(ocs_icsp_file, "w+") as fs: yaml.safe_dump(ocs_icsp, fs) # create ImageContentSourcePolicy exec_cmd(f"oc apply -f {ocs_icsp_file}") # mirror images based on mapping file with prepare_customized_pull_secret(ocs_related_images) as authfile_fo: login_to_mirror_registry(authfile_fo.name) exec_cmd( f"oc image mirror --filter-by-os='.*' -f {mapping_file} --insecure " f"--registry-config={authfile_fo.name} --max-per-registry=2", timeout=3600, ) # Disable the default OperatorSources exec_cmd( """oc patch OperatorHub cluster --type json """ """-p '[{"op": "add", "path": "/spec/disableAllDefaultSources", "value": true}]'""" ) # wait for newly created imageContentSourcePolicy is applied on all nodes wait_for_machineconfigpool_status("all") return ocs_registry_image_with_digest
def __init__(self): super(ROSAOCP, self).__init__() self.ocp_version = get_ocp_version() self.region = config.ENV_DATA["region"]