def setUpClass(cls): """Initialize all the variables necessary for test cases.""" super(BaseClass, cls).setUpClass() # Initializes OCP config variables cls.ocp_servers_info = g.config['ocp_servers'] cls.ocp_master_node = list(g.config['ocp_servers']['master'].keys()) cls.ocp_master_node_info = g.config['ocp_servers']['master'] cls.ocp_client = list(g.config['ocp_servers']['client'].keys()) cls.ocp_client_info = g.config['ocp_servers']['client'] cls.ocp_nodes = list(g.config['ocp_servers']['nodes'].keys()) cls.ocp_nodes_info = g.config['ocp_servers']['nodes'] # Initializes storage project config variables openshift_config = g.config.get("cns", g.config.get("openshift")) cls.storage_project_name = openshift_config.get( 'storage_project_name', openshift_config.get('setup', {}).get('cns_project_name')) # Initializes heketi config variables heketi_config = openshift_config['heketi_config'] cls.heketi_dc_name = heketi_config['heketi_dc_name'] cls.heketi_service_name = heketi_config['heketi_service_name'] cls.heketi_client_node = heketi_config['heketi_client_node'] cls.heketi_server_url = heketi_config['heketi_server_url'] cls.heketi_cli_user = heketi_config['heketi_cli_user'] cls.heketi_cli_key = heketi_config['heketi_cli_key'] cls.gluster_servers = list(g.config['gluster_servers'].keys()) cls.gluster_servers_info = g.config['gluster_servers'] cls.storage_classes = openshift_config['dynamic_provisioning'][ 'storage_classes'] cls.sc = cls.storage_classes.get( 'storage_class1', cls.storage_classes.get('file_storage_class')) cmd = "echo -n %s | base64" % cls.heketi_cli_key ret, out, err = g.run(cls.ocp_master_node[0], cmd, "root") if ret != 0: raise ExecutionError("failed to execute cmd %s on %s out: %s " "err: %s" % (cmd, cls.ocp_master_node[0], out, err)) cls.secret_data_key = out.strip() # Checks if heketi server is alive if not hello_heketi(cls.heketi_client_node, cls.heketi_server_url): raise ConfigError("Heketi server %s is not alive" % cls.heketi_server_url) # Switch to the storage project if not switch_oc_project(cls.ocp_master_node[0], cls.storage_project_name): raise ExecutionError("Failed to switch oc project on node %s" % cls.ocp_master_node[0]) if 'glustotest_run_id' not in g.config: g.config['glustotest_run_id'] = ( datetime.datetime.now().strftime('%H_%M_%d_%m_%Y')) cls.glustotest_run_id = g.config['glustotest_run_id'] msg = "Setupclass: %s : %s" % (cls.__name__, cls.glustotest_run_id) g.log.info(msg)
def test_pvc_resize_while_ios_are_running(self): """Re-size PVC while IO's are running""" # Create an SC, PVC and app pod sc_name = self.create_storage_class(create_vol_name_prefix=True, allow_volume_expansion=True) pvc_name = self.create_and_wait_for_pvc(sc_name=sc_name, pvc_size=1) dc_name, pod_name = self.create_dc_with_pvc(pvc_name) # Run io on the pod for 5 minutes in background cmd_io = ('timeout 5m bash -c -- "while true; do oc exec {} dd ' 'if=/dev/urandom of=/mnt/f1 bs=100K count=2000; ' 'done"'.format(pod_name)) proc = g.run_async(host=self.node, command=cmd_io) # Resize PVC while io's are running and validate resize operation resize_pvc(self.node, pvc_name, 2) verify_pvc_size(self.node, pvc_name, 2) pv_name = get_pv_name_from_pvc(self.node, pvc_name) verify_pv_size(self.node, pv_name, 2) # Check if timeout command and ios are successful ret, _, err = proc.async_communicate() msg = "command terminated with exit code" if ret != 124 or msg in str(err): raise ExecutionError("Failed to run io, error {}".format(str(err)))
def test_pv_resize_with_prefix_for_name_and_size( self, create_vol_name_prefix=False, valid_size=True): """Validate PV resize with and without name prefix""" dir_path = "/mnt/" node = self.ocp_client[0] # Create PVC self.create_storage_class( allow_volume_expansion=True, create_vol_name_prefix=create_vol_name_prefix) pvc_name = self.create_and_wait_for_pvc() # Create DC with POD and attached PVC to it. dc_name = oc_create_app_dc_with_io(node, pvc_name) self.addCleanup(oc_delete, node, 'dc', dc_name) self.addCleanup(scale_dc_pod_amount_and_wait, node, dc_name, 0) pod_name = get_pod_name_from_dc(node, dc_name) wait_for_pod_be_ready(node, pod_name) if create_vol_name_prefix: ret = heketi_ops.verify_volume_name_prefix( node, self.sc['volumenameprefix'], self.sc['secretnamespace'], pvc_name, self.heketi_server_url) self.assertTrue(ret, "verify volnameprefix failed") cmd = ("dd if=/dev/urandom of=%sfile " "bs=100K count=1000") % dir_path ret, out, err = oc_rsh(node, pod_name, cmd) self.assertEqual(ret, 0, "Failed to execute command %s on %s" % (cmd, node)) pv_name = get_pv_name_from_pvc(node, pvc_name) # If resize size is invalid then size should not change if valid_size: cmd = ("dd if=/dev/urandom of=%sfile2 " "bs=100K count=10000") % dir_path with self.assertRaises(AssertionError): ret, out, err = oc_rsh(node, pod_name, cmd) msg = ("Command '%s' was expected to fail on '%s' node. " "But it returned following: ret is '%s', err is '%s' " "and out is '%s'" % (cmd, node, ret, err, out)) raise ExecutionError(msg) pvc_size = 2 resize_pvc(node, pvc_name, pvc_size) verify_pvc_size(node, pvc_name, pvc_size) verify_pv_size(node, pv_name, pvc_size) else: invalid_pvc_size = 'ten' with self.assertRaises(AssertionError): resize_pvc(node, pvc_name, invalid_pvc_size) verify_pvc_size(node, pvc_name, 1) verify_pv_size(node, pv_name, 1) oc_delete(node, 'pod', pod_name) wait_for_resource_absence(node, 'pod', pod_name) pod_name = get_pod_name_from_dc(node, dc_name) wait_for_pod_be_ready(node, pod_name) cmd = ("dd if=/dev/urandom of=%sfile_new " "bs=50K count=10000") % dir_path ret, out, err = oc_rsh(node, pod_name, cmd) self.assertEqual(ret, 0, "Failed to execute command %s on %s" % (cmd, node))
def test_delete_heketidb_volume(self): """Method to test heketidb volume deletion via heketi-cli.""" for i in range(0, 2): volume_info = heketi_ops.heketi_volume_create( self.heketi_client_node, self.heketi_server_url, 10, json=True) self.addCleanup(heketi_ops.heketi_volume_delete, self.heketi_client_node, self.heketi_server_url, volume_info["id"]) volume_list_info = heketi_ops.heketi_volume_list( self.heketi_client_node, self.heketi_server_url, json=True) self.assertTrue(volume_list_info["volumes"], "Heketi volume list empty.") for volume_id in volume_list_info["volumes"]: volume_info = heketi_ops.heketi_volume_info( self.heketi_client_node, self.heketi_server_url, volume_id, json=True) if volume_info["name"] == "heketidbstorage": self.assertRaises(AssertionError, heketi_ops.heketi_volume_delete, self.heketi_client_node, self.heketi_server_url, volume_id) return raise ExecutionError( "Warning: heketidbstorage doesn't exist in list of volumes")
def test_dynamic_provisioning_glusterfile_glusterpod_failure(self): """Create glusterblock PVC when gluster pod is down.""" # Check that we work with containerized Gluster if not self.is_containerized_gluster(): self.skipTest("Only containerized Gluster clusters are supported.") mount_path = "/mnt" datafile_path = '%s/fake_file_for_%s' % (mount_path, self.id()) # Create secret and storage class self.create_storage_class() # Create PVC pvc_name = self.create_and_wait_for_pvc() # Create app POD with attached volume pod_name = oc_create_tiny_pod_with_volume(self.node, pvc_name, "test-pvc-mount-on-app-pod", mount_path=mount_path) self.addCleanup(wait_for_resource_absence, self.node, 'pod', pod_name) self.addCleanup(oc_delete, self.node, 'pod', pod_name) # Wait for app POD be up and running wait_for_pod_be_ready(self.node, pod_name, timeout=60, wait_step=2) # Run IO in background io_cmd = "oc rsh %s dd if=/dev/urandom of=%s bs=1000K count=900" % ( pod_name, datafile_path) async_io = g.run_async(self.node, io_cmd, "root") # Pick up one of the hosts which stores PV brick (4+ nodes case) gluster_pod_data = get_gluster_pod_names_by_pvc_name( self.node, pvc_name)[0] # Delete glusterfs POD from chosen host and wait for spawn of new one oc_delete(self.node, 'pod', gluster_pod_data["pod_name"]) cmd = ("oc get pods -o wide | grep glusterfs | grep %s | " "grep -v Terminating | awk '{print $1}'") % ( gluster_pod_data["host_name"]) for w in Waiter(600, 15): out = self.cmd_run(cmd) new_gluster_pod_name = out.strip().split("\n")[0].strip() if not new_gluster_pod_name: continue else: break if w.expired: error_msg = "exceeded timeout, new gluster pod not created" g.log.error(error_msg) raise ExecutionError(error_msg) new_gluster_pod_name = out.strip().split("\n")[0].strip() g.log.info("new gluster pod name is %s" % new_gluster_pod_name) wait_for_pod_be_ready(self.node, new_gluster_pod_name) # Check that async IO was not interrupted ret, out, err = async_io.async_communicate() self.assertEqual(ret, 0, "IO %s failed on %s" % (io_cmd, self.node))
def test_delete_heketidb_volume(self): """ Method to test heketidb volume deletion via heketi-cli """ heketidbexists = False msg = "Error: Cannot delete volume containing the Heketi database" for i in range(0, 2): volume_info = heketi_ops.heketi_volume_create( self.heketi_client_node, self.heketi_server_url, 10, json=True) self.addCleanup( heketi_ops.heketi_volume_delete, self.heketi_client_node, self.heketi_server_url, volume_info["id"]) volume_list_info = heketi_ops.heketi_volume_list( self.heketi_client_node, self.heketi_server_url, json=True) if volume_list_info["volumes"] == []: raise ExecutionError("Heketi volume list empty") for volume_id in volume_list_info["volumes"]: volume_info = heketi_ops.heketi_volume_info( self.heketi_client_node, self.heketi_server_url, volume_id, json=True) if volume_info["name"] == "heketidbstorage": heketidbexists = True delete_ret, delete_output, delete_error = ( heketi_ops.heketi_volume_delete( self.heketi_client_node, self.heketi_server_url, volume_id, raw_cli_output=True)) self.assertNotEqual(delete_ret, 0, "Return code not 0") self.assertEqual( delete_error.strip(), msg, "Invalid reason for heketidb deletion failure") if not heketidbexists: raise ExecutionError( "Warning: heketidbstorage doesn't exist in list of volumes")
def _node_reboot(self): storage_hostname = (g.config["gluster_servers"] [self.gluster_servers[0]]["storage"]) cmd = "sleep 3; /sbin/shutdown -r now 'Reboot triggered by Glusto'" ret, out, err = g.run(storage_hostname, cmd) self.addCleanup(self._wait_for_gluster_pod_to_be_ready) if ret != 255: err_msg = "failed to reboot host %s error: %s" % ( storage_hostname, err) g.log.error(err_msg) raise AssertionError(err_msg) try: g.ssh_close_connection(storage_hostname) except Exception as e: g.log.error("failed to close connection with host %s" " with error: %s" % (storage_hostname, e)) raise # added sleep as node will restart after 3 sec time.sleep(3) for w in Waiter(timeout=600, interval=10): try: if g.rpyc_get_connection(storage_hostname, user="******"): g.rpyc_close_connection(storage_hostname, user="******") break except Exception as err: g.log.info("exception while getting connection: '%s'" % err) if w.expired: error_msg = ("exceeded timeout 600 sec, node '%s' is " "not reachable" % storage_hostname) g.log.error(error_msg) raise ExecutionError(error_msg) # wait for the gluster pod to be in 'Running' state self._wait_for_gluster_pod_to_be_ready() # glusterd and gluster-blockd service should be up and running service_names = ("glusterd", "gluster-blockd", "tcmu-runner") for gluster_pod in self.gluster_pod_list: for service in service_names: g.log.info("gluster_pod - '%s' : gluster_service '%s'" % ( gluster_pod, service)) check_service_status_on_pod( self.oc_node, gluster_pod, service, "running" )
def _wait_for_gluster_pod_to_be_ready(self): for gluster_pod in self.gluster_pod_list: for w in Waiter(timeout=600, interval=10): try: success = wait_for_pod_be_ready( self.oc_node, gluster_pod, timeout=1, wait_step=1 ) if success: break except ExecutionError as e: g.log.info("exception %s while validating gluster " "pod %s" % (e, gluster_pod)) if w.expired: error_msg = ("exceeded timeout 600 sec, pod '%s' is " "not in 'running' state" % gluster_pod) g.log.error(error_msg) raise ExecutionError(error_msg)
def reboot_gluster_node_and_wait_for_services(self): gluster_node_ip = ( g.config["gluster_servers"][self.gluster_servers[0]]["storage"]) gluster_pod = list( filter(lambda pod: (pod["pod_host_ip"] == gluster_node_ip), get_ocp_gluster_pod_details(self.oc_node))) if not gluster_pod: raise ExecutionError("Gluster pod Host IP '%s' not matched." % gluster_node_ip) gluster_pod = gluster_pod[0]["pod_name"] self.addCleanup(wait_for_pod_be_ready, self.oc_node, gluster_pod) node_reboot_by_command(gluster_node_ip, timeout=600, wait_step=10) # wait for the gluster pod to be in 'Running' state wait_for_pod_be_ready(self.oc_node, gluster_pod) # glusterd and gluster-blockd service should be up and running services = (("glusterd", "running"), ("gluster-blockd", "running"), ("tcmu-runner", "running"), ("gluster-block-target", "exited")) for service, state in services: check_service_status_on_pod(self.oc_node, gluster_pod, service, "active", state)
def validate_multipath_pod(hostname, podname, hacount, mpath): """Validate multipath for given app-pod. Args: hostname (str): ocp master node name podname (str): app-pod name for which we need to validate multipath. ex : nginx1 hacount (int): multipath count or HA count. ex: 3 mpath (str): multipath value to check Returns: bool: True if successful, otherwise raises exception """ pod_nodename_list = oc_get_custom_resource(hostname, 'pod', custom=':.spec.nodeName', name=podname) if not pod_nodename_list: raise ExecutionError( "Failed to get ip for pod from hostname {}".format(hostname)) pod_nodename = pod_nodename_list[0] active_node_count, enable_node_count = (1, hacount - 1) cmd = "multipath -ll %s | grep 'status=active' | wc -l" % mpath active_count = int(cmd_run(cmd, pod_nodename)) assert active_node_count == active_count, ( "Active node count on %s for %s is %s and not 1" % (pod_nodename, podname, active_count)) cmd = "multipath -ll %s | grep 'status=enabled' | wc -l" % mpath enable_count = int(cmd_run(cmd, pod_nodename)) assert enable_node_count == enable_count, ( "Passive node count on %s for %s is %s and not %s" % (pod_nodename, podname, enable_count, enable_node_count)) g.log.info("Validation of multipath for %s is successfull" % podname) return True
def enable_pvc_resize(master_node): ''' This function edits the /etc/origin/master/master-config.yaml file - to enable pv_resize feature and restarts atomic-openshift service on master node Args: master_node (str): hostname of masternode on which want to edit the master-config.yaml file Returns: bool: True if successful, otherwise raise Exception ''' version = get_openshift_version() if version < "3.9": msg = ("pv resize is not available in openshift " "version %s " % version) g.log.error(msg) raise NotSupportedException(msg) with tempfile.NamedTemporaryFile(delete=False) as temp: temp_filename = temp.name try: g.download(master_node, MASTER_CONFIG_FILEPATH, temp_filename) except Exception as e: err_msg = ( "Failed to download '{}' from master node '{}' due to" "exception\n{}".format( MASTER_CONFIG_FILEPATH, master_node, six.text_type(e))) raise ExecutionError(err_msg) with open(temp_filename, 'r') as f: data = yaml.load(f, Loader=yaml.FullLoader) dict_add = data['admissionConfig']['pluginConfig'] if "PersistentVolumeClaimResize" in dict_add: g.log.info("master-config.yaml file is already edited") return True dict_add['PersistentVolumeClaimResize'] = { 'configuration': { 'apiVersion': 'v1', 'disable': 'false', 'kind': 'DefaultAdmissionConfig'}} data['admissionConfig']['pluginConfig'] = dict_add kube_config = data['kubernetesMasterConfig'] for key in ('apiServerArguments', 'controllerArguments'): kube_config[key] = ( kube_config.get(key) if isinstance(kube_config.get(key), dict) else {}) value = ['ExpandPersistentVolumes=true'] kube_config[key]['feature-gates'] = value with open(temp_filename, 'w+') as f: yaml.dump(data, f, default_flow_style=False) try: g.upload(master_node, temp_filename, MASTER_CONFIG_FILEPATH) except Exception as e: err_msg = ( "Failed to upload '{}' to master node '{}' due to" "exception\n{}".format( master_node, MASTER_CONFIG_FILEPATH, six.text_type(e))) raise ExecutionError(err_msg) os.unlink(temp_filename) if version == "3.9": cmd = ("systemctl restart atomic-openshift-master-api " "atomic-openshift-master-controllers") else: cmd = ("/usr/local/bin/master-restart api && " "/usr/local/bin/master-restart controllers") ret, out, err = g.run(master_node, cmd, "root") if ret != 0: err_msg = "Failed to execute cmd %s on %s\nout: %s\nerr: %s" % ( cmd, master_node, out, err) g.log.error(err_msg) raise ExecutionError(err_msg) # Wait for API service to be ready after the restart for w in waiter.Waiter(timeout=120, interval=1): try: cmd_run("oc get nodes", master_node) return True except AssertionError: continue err_msg = "Exceeded 120s timeout waiting for OCP API to start responding." g.log.error(err_msg) raise ExecutionError(err_msg)
def enable_pvc_resize(master_node): ''' This function edits the /etc/origin/master/master-config.yaml file - to enable pv_resize feature and restarts atomic-openshift service on master node Args: master_node (str): hostname of masternode on which want to edit the master-config.yaml file Returns: bool: True if successful, otherwise raise Exception ''' version = get_openshift_version() if version < "3.9": msg = ("pv resize is not available in openshift " "version %s " % version) g.log.error(msg) raise NotSupportedException(msg) try: conn = g.rpyc_get_connection(master_node, user="******") if conn is None: err_msg = ("Failed to get rpyc connection of node %s" % master_node) g.log.error(err_msg) raise ExecutionError(err_msg) with conn.builtin.open(MASTER_CONFIG_FILEPATH, 'r') as f: data = yaml.load(f) dict_add = data['admissionConfig']['pluginConfig'] if "PersistentVolumeClaimResize" in dict_add: g.log.info("master-config.yaml file is already edited") return True dict_add['PersistentVolumeClaimResize'] = { 'configuration': { 'apiVersion': 'v1', 'disable': 'false', 'kind': 'DefaultAdmissionConfig' } } data['admissionConfig']['pluginConfig'] = dict_add kube_config = data['kubernetesMasterConfig'] for key in ('apiServerArguments', 'controllerArguments'): kube_config[key] = (kube_config.get(key) if isinstance( kube_config.get(key), dict) else {}) value = ['ExpandPersistentVolumes=true'] kube_config[key]['feature-gates'] = value with conn.builtin.open(MASTER_CONFIG_FILEPATH, 'w+') as f: yaml.dump(data, f, default_flow_style=False) except Exception as err: raise ExecutionError("failed to edit master-config.yaml file " "%s on %s" % (err, master_node)) finally: g.rpyc_close_connection(master_node, user="******") g.log.info("successfully edited master-config.yaml file " "%s" % master_node) if version == "3.9": cmd = ("systemctl restart atomic-openshift-master-api " "atomic-openshift-master-controllers") else: cmd = ("/usr/local/bin/master-restart api && " "/usr/local/bin/master-restart controllers") ret, out, err = g.run(master_node, cmd, "root") if ret != 0: err_msg = "Failed to execute cmd %s on %s\nout: %s\nerr: %s" % ( cmd, master_node, out, err) g.log.error(err_msg) raise ExecutionError(err_msg) return True