def create_and_wait_for_pvcs(self, pvc_size=1, pvc_name_prefix="autotests-pvc", pvc_amount=1, sc_name=None, timeout=120, wait_step=3): node = self.ocp_client[0] # Create storage class if not specified if not sc_name: if getattr(self, "sc_name", ""): sc_name = self.sc_name else: sc_name = self.create_storage_class() # Create PVCs pvc_names = [] for i in range(pvc_amount): pvc_name = oc_create_pvc( node, sc_name, pvc_name_prefix=pvc_name_prefix, pvc_size=pvc_size) pvc_names.append(pvc_name) self.addCleanup( wait_for_resource_absence, node, 'pvc', pvc_name) # Wait for PVCs to be in bound state try: for pvc_name in pvc_names: verify_pvc_status_is_bound(node, pvc_name, timeout, wait_step) finally: if get_openshift_version() < "3.9": reclaim_policy = "Delete" else: reclaim_policy = oc_get_custom_resource( node, 'sc', ':.reclaimPolicy', sc_name)[0] for pvc_name in pvc_names: if reclaim_policy == 'Retain': pv_name = get_pv_name_from_pvc(node, pvc_name) self.addCleanup(oc_delete, node, 'pv', pv_name, raise_on_absence=False) custom = (r':.metadata.annotations."gluster\.kubernetes' r'\.io\/heketi\-volume\-id"') vol_id = oc_get_custom_resource( node, 'pv', custom, pv_name)[0] if self.sc.get('provisioner') == "kubernetes.io/glusterfs": self.addCleanup(heketi_volume_delete, self.heketi_client_node, self.heketi_server_url, vol_id, raise_on_error=False) else: self.addCleanup(heketi_blockvolume_delete, self.heketi_client_node, self.heketi_server_url, vol_id, raise_on_error=False) self.addCleanup(oc_delete, node, 'pvc', pvc_name, raise_on_absence=False) return pvc_names
def dynamic_provisioning_glusterfile(self, create_vol_name_prefix): # Create secret and storage class self.create_storage_class( create_vol_name_prefix=create_vol_name_prefix) # Create PVC pvc_name = self.create_and_wait_for_pvc() # Create DC with POD and attached PVC to it. dc_name = oc_create_app_dc_with_io(self.node, pvc_name) self.addCleanup(oc_delete, self.node, 'dc', dc_name) self.addCleanup(scale_dc_pod_amount_and_wait, self.node, dc_name, 0) pod_name = get_pod_name_from_dc(self.node, dc_name) wait_for_pod_be_ready(self.node, pod_name) # Verify Heketi volume name for prefix presence if provided if create_vol_name_prefix: ret = verify_volume_name_prefix(self.node, self.sc['volumenameprefix'], self.sc['secretnamespace'], pvc_name, self.sc['resturl']) self.assertTrue(ret, "verify volnameprefix failed") else: # Get the volume name and volume id from PV pv_name = get_pv_name_from_pvc(self.ocp_client[0], pvc_name) custom = [ r':spec.glusterfs.path', r':metadata.annotations.' r'"gluster\.kubernetes\.io\/heketi-volume-id"' ] pv_vol_name, vol_id = oc_get_custom_resource( self.ocp_client[0], 'pv', custom, pv_name) # check if the pv_volume_name is present in heketi # Check if volume name is "vol_"+volumeid or not heketi_vol_name = heketi_volume_info( self.ocp_client[0], self.heketi_server_url, vol_id, json=True)['name'] self.assertEqual(pv_vol_name, heketi_vol_name, 'Volume with vol_id = %s not found' 'in heketidb' % vol_id) self.assertEqual(heketi_vol_name, 'vol_' + vol_id, 'Volume with vol_id = %s have a' 'custom perfix' % vol_id) out = cmd_run_on_gluster_pod_or_node(self.ocp_master_node[0], "gluster volume list") self.assertIn(pv_vol_name, out, "Volume with id %s does not exist" % vol_id) # Make sure we are able to work with files on the mounted volume filepath = "/mnt/file_for_testing_io.log" for cmd in ("dd if=/dev/urandom of=%s bs=1K count=100", "ls -lrt %s", "rm -rf %s"): cmd = cmd % filepath ret, out, err = oc_rsh(self.node, pod_name, cmd) self.assertEqual( ret, 0, "Failed to execute '%s' command on %s" % (cmd, self.node))
def get_block_hosting_volume_by_pvc_name(self, pvc_name): """Get block hosting volume of pvc name given Args: pvc_name (str): pvc name of which host name is need to be returned """ pv_name = oc_get_custom_resource( self.oc_node, 'pvc', ':.spec.volumeName', name=pvc_name )[0] block_volume = oc_get_custom_resource( self.oc_node, 'pv', r':.metadata.annotations."gluster\.org\/volume\-id"', name=pv_name )[0] # get block hosting volume from pvc name block_hosting_vol = get_block_hosting_volume_name( self.heketi_client_node, self.heketi_server_url, block_volume) return block_hosting_vol
def test_dynamic_provisioning_glusterblock_reclaim_policy_retain(self): """Validate retain policy for gluster-block after PVC deletion""" if get_openshift_version() < "3.9": self.skipTest( "'Reclaim' feature is not supported in OCP older than 3.9") self.create_storage_class(reclaim_policy='Retain') self.create_and_wait_for_pvc() dc_name = oc_create_app_dc_with_io(self.node, self.pvc_name) try: pod_name = get_pod_name_from_dc(self.node, dc_name) wait_for_pod_be_ready(self.node, pod_name) finally: scale_dc_pod_amount_and_wait(self.node, dc_name, pod_amount=0) oc_delete(self.node, 'dc', dc_name) # get the name of volume pv_name = get_pv_name_from_pvc(self.node, self.pvc_name) custom = [r':.metadata.annotations."gluster\.org\/volume\-id"', r':.spec.persistentVolumeReclaimPolicy'] vol_id, reclaim_policy = oc_get_custom_resource( self.node, 'pv', custom, pv_name) # checking the retainPolicy of pvc self.assertEqual(reclaim_policy, 'Retain') # delete the pvc oc_delete(self.node, 'pvc', self.pvc_name) # check if pv is also deleted or not with self.assertRaises(ExecutionError): wait_for_resource_absence( self.node, 'pvc', self.pvc_name, interval=3, timeout=30) # getting the blockvol list blocklist = heketi_blockvolume_list(self.heketi_client_node, self.heketi_server_url) self.assertIn(vol_id, blocklist) heketi_blockvolume_delete(self.heketi_client_node, self.heketi_server_url, vol_id) blocklist = heketi_blockvolume_list(self.heketi_client_node, self.heketi_server_url) self.assertNotIn(vol_id, blocklist) oc_delete(self.node, 'pv', pv_name) wait_for_resource_absence(self.node, 'pv', pv_name)
def test_dynamic_provisioning_glusterfile_reclaim_policy_retain(self): """Validate retain policy for glusterfs after deletion of pvc""" if get_openshift_version() < "3.9": self.skipTest( "'Reclaim' feature is not supported in OCP older than 3.9") self.create_storage_class(reclaim_policy='Retain') self.create_and_wait_for_pvc() # get the name of the volume pv_name = get_pv_name_from_pvc(self.node, self.pvc_name) custom = [r':.metadata.annotations.' r'"gluster\.kubernetes\.io\/heketi\-volume\-id"', r':.spec.persistentVolumeReclaimPolicy'] vol_id, reclaim_policy = oc_get_custom_resource( self.node, 'pv', custom, pv_name) self.assertEqual(reclaim_policy, 'Retain') # Create DC with POD and attached PVC to it. try: dc_name = oc_create_app_dc_with_io(self.node, self.pvc_name) pod_name = get_pod_name_from_dc(self.node, dc_name) wait_for_pod_be_ready(self.node, pod_name) finally: scale_dc_pod_amount_and_wait(self.node, dc_name, 0) oc_delete(self.node, 'dc', dc_name) wait_for_resource_absence(self.node, 'pod', pod_name) oc_delete(self.node, 'pvc', self.pvc_name) with self.assertRaises(ExecutionError): wait_for_resource_absence( self.node, 'pvc', self.pvc_name, interval=3, timeout=30) heketi_volume_delete(self.heketi_client_node, self.heketi_server_url, vol_id) vol_list = heketi_volume_list(self.heketi_client_node, self.heketi_server_url) self.assertNotIn(vol_id, vol_list) oc_delete(self.node, 'pv', pv_name) wait_for_resource_absence(self.node, 'pv', pv_name)
def validate_multipath_info(self, hacount): """validates multipath command on the pod node Args: hacount (int): hacount for which multipath to be checked """ # create pod using pvc created dc_name = oc_create_app_dc_with_io( self.ocp_master_node[0], self.pvc_name ) pod_name = get_pod_name_from_dc(self.ocp_master_node[0], dc_name) self.addCleanup(oc_delete, self.ocp_master_node[0], "dc", dc_name) self.addCleanup( scale_dc_pod_amount_and_wait, self.ocp_master_node[0], dc_name, 0 ) wait_for_pod_be_ready( self.ocp_master_node[0], pod_name, timeout=120, wait_step=3 ) # Get pod info pod_info = oc_get_pods( self.ocp_master_node[0], selector='deploymentconfig=%s' % dc_name) node = pod_info[pod_name]['node'] # Find iqn from volume info pv_name = get_pv_name_from_pvc(self.ocp_master_node[0], self.pvc_name) custom = [r':.metadata.annotations."gluster\.org\/volume\-id"'] vol_id = oc_get_custom_resource( self.ocp_master_node[0], 'pv', custom, pv_name)[0] vol_info = heketi_blockvolume_info( self.heketi_client_node, self.heketi_server_url, vol_id, json=True) iqn = vol_info['blockvolume']['iqn'] # Get the paths info from the node devices = get_iscsi_block_devices_by_path(node, iqn).keys() self.assertEqual(hacount, len(devices)) # Validate mpath mpaths = set() for device in devices: mpaths.add(get_mpath_name_from_device_name(node, device)) self.assertEqual(1, len(mpaths)) validate_multipath_pod( self.ocp_master_node[0], pod_name, hacount, list(mpaths)[0])
def test_volname_prefix_glusterblock(self): """Validate custom volname prefix blockvol""" self.dynamic_provisioning_glusterblock( set_hacount=False, create_vol_name_prefix=True) pv_name = get_pv_name_from_pvc(self.node, self.pvc_name) vol_name = oc_get_custom_resource( self.node, 'pv', ':.metadata.annotations.glusterBlockShare', pv_name)[0] block_vol_list = heketi_blockvolume_list( self.heketi_client_node, self.heketi_server_url) self.assertIn(vol_name, block_vol_list) self.assertTrue(vol_name.startswith( self.sc.get('volumenameprefix', 'autotest')))
def test_glusterblock_logs_presence_verification(self): """Validate presence of glusterblock provisioner POD and it's status""" # Get glusterblock provisioner dc name cmd = ("oc get dc | awk '{ print $1 }' | " "grep -e glusterblock -e provisioner") dc_name = cmd_run(cmd, self.ocp_master_node[0], True) # Get glusterblock provisioner pod name and it's status gb_prov_name, gb_prov_status = oc_get_custom_resource( self.node, 'pod', custom=':.metadata.name,:.status.phase', selector='deploymentconfig=%s' % dc_name)[0] self.assertEqual(gb_prov_status, 'Running') # Create Secret, SC and PVC self.create_storage_class() self.create_and_wait_for_pvc() # Get list of Gluster nodes g_hosts = list(g.config.get("gluster_servers", {}).keys()) self.assertGreater( len(g_hosts), 0, "We expect, at least, one Gluster Node/POD:\n %s" % g_hosts) # Perform checks on Gluster nodes/PODs logs = ("gluster-block-configshell", "gluster-blockd") gluster_pods = oc_get_pods( self.ocp_client[0], selector="glusterfs-node=pod") if gluster_pods: cmd = "tail -n 5 /var/log/glusterfs/gluster-block/%s.log" else: cmd = "tail -n 5 /var/log/gluster-block/%s.log" for g_host in g_hosts: for log in logs: out = cmd_run_on_gluster_pod_or_node( self.ocp_client[0], cmd % log, gluster_node=g_host) self.assertTrue(out, "Command '%s' output is empty." % cmd)
def create_and_wait_for_pvcs(self, pvc_size=1, pvc_name_prefix="autotests-pvc", pvc_amount=1, sc_name=None, timeout=120, wait_step=3): node = self.ocp_client[0] # Create storage class if not specified if not sc_name: if getattr(self, "sc_name", ""): sc_name = self.sc_name else: sc_name = self.create_storage_class() # Create PVCs pvc_names = [] for i in range(pvc_amount): pvc_name = oc_create_pvc(node, sc_name, pvc_name_prefix=pvc_name_prefix, pvc_size=pvc_size) pvc_names.append(pvc_name) self.addCleanup(wait_for_resource_absence, node, 'pvc', pvc_name) # Wait for PVCs to be in bound state try: for pvc_name in pvc_names: verify_pvc_status_is_bound(node, pvc_name, timeout, wait_step) finally: if get_openshift_version() < "3.9": reclaim_policy = "Delete" else: reclaim_policy = oc_get_custom_resource( node, 'sc', ':.reclaimPolicy', sc_name)[0] for pvc_name in pvc_names: if reclaim_policy == 'Retain': pv_name = get_pv_name_from_pvc(node, pvc_name) self.addCleanup(oc_delete, node, 'pv', pv_name, raise_on_absence=False) custom = (r':.metadata.annotations."gluster\.kubernetes' r'\.io\/heketi\-volume\-id"') vol_id = oc_get_custom_resource(node, 'pv', custom, pv_name)[0] if self.sc.get('provisioner') == "kubernetes.io/glusterfs": self.addCleanup(heketi_volume_delete, self.heketi_client_node, self.heketi_server_url, vol_id, raise_on_error=False) else: self.addCleanup(heketi_blockvolume_delete, self.heketi_client_node, self.heketi_server_url, vol_id, raise_on_error=False) self.addCleanup(oc_delete, node, 'pvc', pvc_name, raise_on_absence=False) return pvc_names
def test_brick_evict_with_node_down(self): """Test brick evict basic functionality and verify brick evict after node down""" h_node, h_server = self.heketi_client_node, self.heketi_server_url # Disable node if more than 3 node_list = heketi_ops.heketi_node_list(h_node, h_server) if len(node_list) > 3: for node_id in node_list[3:]: heketi_ops.heketi_node_disable(h_node, h_server, node_id) self.addCleanup(heketi_ops.heketi_node_enable, h_node, h_server, node_id) # Create heketi volume vol_info = heketi_ops.heketi_volume_create(h_node, h_server, 1, json=True) self.addCleanup(heketi_ops.heketi_volume_delete, h_node, h_server, vol_info.get('id')) # Get node on which heketi pod is scheduled heketi_pod = openshift_ops.get_pod_name_from_dc( self.ocp_client, self.heketi_dc_name) heketi_node = openshift_ops.oc_get_custom_resource( self.ocp_client, 'pod', '.:spec.nodeName', heketi_pod)[0] # Get list of hostname from node id host_list = [] for node_id in node_list[3:]: node_info = heketi_ops.heketi_node_info(h_node, h_server, node_id, json=True) host_list.append(node_info.get('hostnames').get('manage')[0]) # Get brick id and glusterfs node which is not heketi node for node in vol_info.get('bricks', {}): node_info = heketi_ops.heketi_node_info(h_node, h_server, node.get('node'), json=True) hostname = node_info.get('hostnames').get('manage')[0] if (hostname != heketi_node) and (hostname not in host_list): brick_id = node.get('id') break # Bring down the glusterfs node vm_name = node_ops.find_vm_name_by_ip_or_hostname(hostname) self.addCleanup(self._wait_for_gluster_pod_after_node_reboot, hostname) self.addCleanup(node_ops.power_on_vm_by_name, vm_name) node_ops.power_off_vm_by_name(vm_name) # Wait glusterfs node to become NotReady custom = r'":.status.conditions[?(@.type==\"Ready\")]".status' for w in waiter.Waiter(300, 20): status = openshift_ops.oc_get_custom_resource( self.ocp_client, 'node', custom, hostname) if status[0] in ['False', 'Unknown']: break if w.expired: raise exceptions.ExecutionError( "Failed to bring down node {}".format(hostname)) # Perform brick evict operation try: heketi_ops.heketi_brick_evict(h_node, h_server, brick_id) except AssertionError as e: if ('No Replacement was found' not in six.text_type(e)): raise
def create_and_wait_for_pvcs(self, pvc_size=1, pvc_name_prefix="autotests-pvc", pvc_amount=1, sc_name=None, timeout=600, wait_step=10, skip_waiting=False, skip_cleanup=False): """Create multiple PVC's not waiting for it Args: pvc_size (int): size of PVC, default value is 1 pvc_name_prefix (str): volume prefix for each PVC, default value is 'autotests-pvc' pvc_amount (int): number of PVC's, default value is 1 sc_name (str): storage class to create PVC, default value is None, which will cause automatic creation of sc. timeout (int): timeout time for waiting for PVC's to get bound wait_step (int): waiting time between each try of PVC status check skip_waiting (bool): boolean value which defines whether we need to wait for PVC creation or not. Returns: List: list of PVC names """ node = self.ocp_client[0] # Create storage class if not specified if not sc_name: if getattr(self, "sc_name", ""): sc_name = self.sc_name else: sc_name = self.create_storage_class(skip_cleanup=skip_cleanup) # Create PVCs pvc_names = [] for i in range(pvc_amount): pvc_name = oc_create_pvc(node, sc_name, pvc_name_prefix=pvc_name_prefix, pvc_size=pvc_size) pvc_names.append(pvc_name) if not skip_cleanup: self.addCleanup(wait_for_resources_absence, node, 'pvc', pvc_names) # Wait for PVCs to be in bound state try: if not skip_waiting: wait_for_pvcs_be_bound(node, pvc_names, timeout, wait_step) finally: if skip_cleanup: return pvc_names if get_openshift_version() < "3.9": reclaim_policy = "Delete" else: reclaim_policy = oc_get_custom_resource( node, 'sc', ':.reclaimPolicy', sc_name)[0] for pvc_name in pvc_names: if reclaim_policy == 'Retain': pv_name = get_pv_name_from_pvc(node, pvc_name) if not pv_name and skip_waiting: continue self.addCleanup(oc_delete, node, 'pv', pv_name, raise_on_absence=False) custom = (r':.metadata.annotations."gluster\.kubernetes' r'\.io\/heketi\-volume\-id"') vol_id = oc_get_custom_resource(node, 'pv', custom, pv_name)[0] if self.sc.get('provisioner') == "kubernetes.io/glusterfs": self.addCleanup(heketi_volume_delete, self.heketi_client_node, self.heketi_server_url, vol_id, raise_on_error=False) else: self.addCleanup(heketi_blockvolume_delete, self.heketi_client_node, self.heketi_server_url, vol_id, raise_on_error=False) self.addCleanup(oc_delete, node, 'pvc', pvc_name, raise_on_absence=False) return pvc_names
def test_pv_resize_when_heketi_down(self): """Create a PVC and try to expand it when heketi is down, It should fail. After heketi is up, expand PVC should work. """ self.create_storage_class(allow_volume_expansion=True) pvc_name = self.create_and_wait_for_pvc() dc_name, pod_name = self.create_dc_with_pvc(pvc_name) pv_name = get_pv_name_from_pvc(self.node, pvc_name) custom = (r':metadata.annotations.' r'"gluster\.kubernetes\.io\/heketi-volume-id"') vol_id = oc_get_custom_resource(self.node, 'pv', custom, pv_name)[0] h_vol_info = heketi_ops.heketi_volume_info(self.heketi_client_node, self.heketi_server_url, vol_id, json=True) # Bring the heketi POD down scale_dc_pod_amount_and_wait(self.node, self.heketi_dc_name, pod_amount=0) self.addCleanup(scale_dc_pod_amount_and_wait, self.node, self.heketi_dc_name, pod_amount=1) cmd = 'dd if=/dev/urandom of=/mnt/%s bs=614400k count=1' ret, out, err = oc_rsh(self.node, pod_name, cmd % 'file1') self.assertFalse(ret, 'Not able to write file with err: %s' % err) wait_for_pod_be_ready(self.node, pod_name, 10, 5) resize_pvc(self.node, pvc_name, 2) wait_for_events(self.node, pvc_name, obj_type='PersistentVolumeClaim', event_type='Warning', event_reason='VolumeResizeFailed') # Verify volume was not expanded vol_info = get_gluster_vol_info_by_pvc_name(self.node, pvc_name) self.assertEqual(vol_info['gluster_vol_id'], h_vol_info['name']) self.assertEqual(len(vol_info['bricks']['brick']), len(h_vol_info['bricks'])) # Bring the heketi POD up scale_dc_pod_amount_and_wait(self.node, self.heketi_dc_name, pod_amount=1) # Verify volume expansion verify_pvc_size(self.node, pvc_name, 2) vol_info = get_gluster_vol_info_by_pvc_name(self.node, pvc_name) self.assertFalse(len(vol_info['bricks']['brick']) % 3) self.assertLess(len(h_vol_info['bricks']), len(vol_info['bricks']['brick'])) # Wait for remount after expansion for w in waiter.Waiter(timeout=30, interval=5): ret, out, err = oc_rsh(self.node, pod_name, "df -Ph /mnt | awk '{print $2}' | tail -1") self.assertFalse(ret, 'Failed with err: %s and Output: %s' % (err, out)) if out.strip() == '2.0G': break # Write data making sure we have more space than it was ret, out, err = oc_rsh(self.node, pod_name, cmd % 'file2') self.assertFalse(ret, 'Not able to write file with err: %s' % err) # Verify pod is running wait_for_pod_be_ready(self.node, pod_name, 10, 5)
def test_targetcli_failure_during_block_pvc_creation(self): h_node, h_server = self.heketi_client_node, self.heketi_server_url # Disable redundant nodes and leave just 3 nodes online h_node_id_list = heketi_node_list(h_node, h_server) self.assertGreater(len(h_node_id_list), 2) for node_id in h_node_id_list[3:]: heketi_node_disable(h_node, h_server, node_id) self.addCleanup(heketi_node_enable, h_node, h_server, node_id) # Gather info about the Gluster node we are going to use for killing # targetcli processes. chosen_g_node_id = h_node_id_list[0] chosen_g_node_info = heketi_node_info(h_node, h_server, chosen_g_node_id, json=True) chosen_g_node_ip = chosen_g_node_info['hostnames']['storage'][0] chosen_g_node_hostname = chosen_g_node_info['hostnames']['manage'][0] chosen_g_node_ip_and_hostname = set( (chosen_g_node_ip, chosen_g_node_hostname)) g_pods = oc_get_custom_resource( self.node, 'pod', [ ':.metadata.name', ':.status.hostIP', ':.status.podIP', ':.spec.nodeName' ], selector='glusterfs-node=pod') if g_pods and g_pods[0]: for g_pod in g_pods: if chosen_g_node_ip_and_hostname.intersection(set(g_pod[1:])): host_to_run_cmds = self.node g_pod_prefix, g_pod = 'oc exec %s -- ' % g_pod[0], g_pod[0] break else: err_msg = ( 'Failed to find Gluster pod filtering it by following IPs ' 'and hostnames: %s\nFound following Gluster pods: %s') % ( chosen_g_node_ip_and_hostname, g_pods) g.log.error(err_msg) raise AssertionError(err_msg) else: host_to_run_cmds, g_pod_prefix, g_pod = chosen_g_node_ip, '', '' # Schedule deletion of targetcli process file_for_bkp, pvc_number = "~/.targetcli/prefs.bin", 10 self.cmd_run("%scp %s %s_backup" % (g_pod_prefix, file_for_bkp, file_for_bkp), hostname=host_to_run_cmds) self.addCleanup(self.cmd_run, "%srm -f %s_backup" % (g_pod_prefix, file_for_bkp), hostname=host_to_run_cmds) kill_targetcli_services_cmd = ( "while true; do " " %spkill targetcli || echo 'failed to kill targetcli process'; " "done" % g_pod_prefix) loop_for_killing_targetcli_process = g.run_async( host_to_run_cmds, kill_targetcli_services_cmd, "root") try: # Create bunch of PVCs sc_name, pvc_names = self.create_storage_class(), [] for i in range(pvc_number): pvc_names.append(oc_create_pvc(self.node, sc_name, pvc_size=1)) self.addCleanup(wait_for_resources_absence, self.node, 'pvc', pvc_names) self.addCleanup(oc_delete, self.node, 'pvc', ' '.join(pvc_names)) # Check that we get expected number of provisioning errors timeout, wait_step, succeeded_pvcs, failed_pvcs = 120, 1, [], [] _waiter, err_msg = Waiter(timeout=timeout, interval=wait_step), "" for pvc_name in pvc_names: _waiter._attempt = 0 for w in _waiter: events = get_events(self.node, pvc_name, obj_type="PersistentVolumeClaim") for event in events: if event['reason'] == 'ProvisioningSucceeded': succeeded_pvcs.append(pvc_name) break elif event['reason'] == 'ProvisioningFailed': failed_pvcs.append(pvc_name) break else: continue break if w.expired: err_msg = ( "Failed to get neither 'ProvisioningSucceeded' nor " "'ProvisioningFailed' statuses for all the PVCs in " "time. Timeout was %ss, interval was %ss." % (timeout, wait_step)) g.log.error(err_msg) raise AssertionError(err_msg) self.assertGreater(len(failed_pvcs), len(succeeded_pvcs)) finally: # Restore targetcli workability loop_for_killing_targetcli_process._proc.terminate() # Revert breakage back which can be caused by BZ-1769426 check_bkp_file_size_cmd = ("%sls -lah %s | awk '{print $5}'" % (g_pod_prefix, file_for_bkp)) bkp_file_size = self.cmd_run(check_bkp_file_size_cmd, hostname=host_to_run_cmds).strip() if bkp_file_size == "0": self.cmd_run("%smv %s_backup %s" % (g_pod_prefix, file_for_bkp, file_for_bkp), hostname=host_to_run_cmds) breakage_err_msg = ( "File located at '%s' was corrupted (zero size) on the " "%s. Looks like BZ-1769426 took effect. \n" "Don't worry, it has been restored after test failure." % (file_for_bkp, "'%s' Gluster pod" % g_pod if g_pod else "'%s' Gluster node" % chosen_g_node_ip)) g.log.error(breakage_err_msg) if err_msg: breakage_err_msg = "%s\n%s" % (err_msg, breakage_err_msg) raise AssertionError(breakage_err_msg) # Wait for all the PVCs to be in bound state wait_for_pvcs_be_bound(self.node, pvc_names, timeout=300, wait_step=5)
def test_expansion_of_block_hosting_volume_using_heketi(self): """Verify that after expanding block hosting volume we are able to consume the expanded space""" h_node = self.heketi_client_node h_url = self.heketi_server_url bvols_in_bhv = set([]) bvols_pv = set([]) BHVS = get_block_hosting_volume_list(h_node, h_url) free_BHVS_count = 0 for vol in BHVS.keys(): info = heketi_volume_info(h_node, h_url, vol, json=True) if info['blockinfo']['freesize'] > 0: free_BHVS_count += 1 if free_BHVS_count > 1: self.skipTest("Skip test case because there is more than one" " Block Hosting Volume with free space") # create block volume of 1gb bvol_info = heketi_blockvolume_create(h_node, h_url, 1, json=True) expand_size = 20 try: self.verify_free_space(expand_size) bhv = bvol_info['blockhostingvolume'] vol_info = heketi_volume_info(h_node, h_url, bhv, json=True) bvols_in_bhv.update(vol_info['blockinfo']['blockvolume']) finally: # cleanup BHV if there is only one block volume inside it if len(bvols_in_bhv) == 1: self.addCleanup(heketi_volume_delete, h_node, h_url, bhv, json=True) self.addCleanup(heketi_blockvolume_delete, h_node, h_url, bvol_info['id']) size = vol_info['size'] free_size = vol_info['blockinfo']['freesize'] bvol_count = int(free_size / expand_size) bricks = vol_info['bricks'] # create pvs to fill the BHV pvcs = self.create_and_wait_for_pvcs( pvc_size=(expand_size if bvol_count else free_size), pvc_amount=(bvol_count or 1), timeout=300) vol_expand = True for i in range(2): # get the vol ids from pvcs for pvc in pvcs: pv = get_pv_name_from_pvc(self.node, pvc) custom = r':.metadata.annotations."gluster\.org\/volume-id"' bvol_id = oc_get_custom_resource(self.node, 'pv', custom, pv) bvols_pv.add(bvol_id[0]) vol_info = heketi_volume_info(h_node, h_url, bhv, json=True) bvols = vol_info['blockinfo']['blockvolume'] bvols_in_bhv.update(bvols) self.assertEqual(bvols_pv, (bvols_in_bhv & bvols_pv)) # Expand BHV and verify bricks and size of BHV if vol_expand: vol_expand = False heketi_volume_expand(h_node, h_url, bhv, expand_size, json=True) vol_info = heketi_volume_info(h_node, h_url, bhv, json=True) self.assertEqual(size + expand_size, vol_info['size']) self.assertFalse(len(vol_info['bricks']) % 3) self.assertLess(len(bricks), len(vol_info['bricks'])) # create more PVCs in expanded BHV pvcs = self.create_and_wait_for_pvcs(pvc_size=(expand_size - 1), pvc_amount=1)
def test_prometheous_kill_bhv_brick_process(self): """Validate kill brick process of block hosting volume with prometheus workload running""" # Add check for CRS version openshift_ops.switch_oc_project(self._master, self._registry_project_name) if not self.is_containerized_gluster(): self.skipTest("Skipping this test case as CRS" " version check can not be implemented") # Get one of the prometheus pod name and respective pvc name openshift_ops.switch_oc_project(self._master, self._prometheus_project_name) prometheus_pods = openshift_ops.oc_get_pods( self._master, selector=self._prometheus_resources_selector) if not prometheus_pods: self.skipTest(prometheus_pods, "Skipping test as prometheus" " pod is not present") # Validate iscsi and multipath prometheus_pod = list(prometheus_pods.keys())[0] pvc_name = openshift_ops.oc_get_custom_resource( self._master, "pod", ":.spec.volumes[*].persistentVolumeClaim.claimName", prometheus_pod) self.assertTrue(pvc_name, "Failed to get PVC name") pvc_name = pvc_name[0] self.verify_iscsi_sessions_and_multipath( pvc_name, prometheus_pod, rtype='pod', heketi_server_url=self._registry_heketi_server_url, is_registry_gluster=True) # Try to fetch metric from prometheus pod self._fetch_metric_from_promtheus_pod( metric='heketi_device_brick_count') # Kill the brick process of a BHV gluster_node = list(self._registry_servers_info.keys())[0] openshift_ops.switch_oc_project(self._master, self._registry_project_name) bhv_name = self.get_block_hosting_volume_by_pvc_name( pvc_name, heketi_server_url=self._registry_heketi_server_url, gluster_node=gluster_node, ocp_client_node=self._master) vol_status = gluster_ops.get_gluster_vol_status(bhv_name) gluster_node_ip, brick_pid = None, None for g_node, g_node_data in vol_status.items(): for process_name, process_data in g_node_data.items(): if process_name.startswith("/var"): gluster_node_ip = g_node brick_pid = process_data["pid"] break if gluster_node_ip and brick_pid: break self.assertIsNotNone(brick_pid, "Could not find pid for brick") cmd = "kill -9 {}".format(brick_pid) openshift_ops.cmd_run_on_gluster_pod_or_node(self._master, cmd, gluster_node_ip) self.addCleanup(self._guster_volume_cleanup, bhv_name) # Check if the brick-process has been killed killed_pid_cmd = ("ps -p {} -o pid --no-headers".format(brick_pid)) try: openshift_ops.cmd_run_on_gluster_pod_or_node( self._master, killed_pid_cmd, gluster_node_ip) except exceptions.ExecutionError: g.log.info("Brick process {} was killed" "successfully".format(brick_pid)) # Try to fetch metric from prometheus pod openshift_ops.switch_oc_project(self._master, self._prometheus_project_name) self._fetch_metric_from_promtheus_pod( metric='heketi_device_brick_count') # Start the bhv using force openshift_ops.switch_oc_project(self._master, self._registry_project_name) start_vol, _, _ = volume_ops.volume_start(gluster_node_ip, bhv_name, force=True) self.assertFalse( start_vol, "Failed to start volume {}" " using force".format(bhv_name)) # Validate iscsi and multipath openshift_ops.switch_oc_project(self._master, self._prometheus_project_name) self.verify_iscsi_sessions_and_multipath( pvc_name, prometheus_pod, rtype='pod', heketi_server_url=self._registry_heketi_server_url, is_registry_gluster=True) # Try to fetch metric from prometheus pod self._fetch_metric_from_promtheus_pod( metric='heketi_device_brick_count')
def verify_iscsi_sessions_and_multipath( self, pvc_name, rname, rtype='dc', heketi_server_url=None, is_registry_gluster=False): if not heketi_server_url: heketi_server_url = self.heketi_server_url # Get storage ips of glusterfs pods keys = (list(g.config['gluster_registry_servers'].keys()) if is_registry_gluster else self.gluster_servers) servers_info = (g.config['gluster_registry_servers'] if is_registry_gluster else self.gluster_servers_info) gluster_ips = [] for key in keys: gluster_ips.append(servers_info[key]['storage']) gluster_ips.sort() # Find iqn and hacount from volume info pv_name = get_pv_name_from_pvc(self.ocp_client[0], pvc_name) custom = [r':.metadata.annotations."gluster\.org\/volume\-id"'] vol_id = oc_get_custom_resource( self.ocp_client[0], 'pv', custom, pv_name)[0] vol_info = heketi_blockvolume_info( self.heketi_client_node, heketi_server_url, vol_id, json=True) iqn = vol_info['blockvolume']['iqn'] hacount = int(vol_info['hacount']) # Find node on which pod is running if rtype == 'dc': pod_name = get_pod_name_from_dc(self.ocp_client[0], rname) pod_info = oc_get_pods( self.ocp_client[0], selector='deploymentconfig=%s' % rname) elif rtype == 'pod': pod_info = oc_get_pods(self.ocp_client[0], name=rname) pod_name = rname elif rtype == 'rc': pod_name = get_pod_name_from_rc(self.ocp_client[0], rname) pod_info = oc_get_pods( self.ocp_client[0], selector='name=%s' % rname) else: raise NameError("Value of rtype should be either 'dc' or 'pod'") node = pod_info[pod_name]['node'] # Get the iscsi sessions info from the node iscsi = get_iscsi_session(node, iqn) msg = ('Only %s iscsi sessions are present on node %s, expected %s.' % (iscsi, node, hacount)) self.assertEqual(hacount, len(iscsi), msg) iscsi.sort() msg = ("Only gluster Nodes %s were expected in iscsi sessions, " "but got other Nodes %s on Node %s" % ( gluster_ips, iscsi, node)) self.assertEqual(set(iscsi), (set(gluster_ips) & set(iscsi)), msg) # Get the paths info from the node devices = get_iscsi_block_devices_by_path(node, iqn) msg = ("Only %s devices are present on Node %s, expected %s" % ( devices, node, hacount,)) self.assertEqual(hacount, len(devices), msg) # Get mpath names and verify that only one mpath is there mpaths = set() for device in devices.keys(): mpaths.add(get_mpath_name_from_device_name(node, device)) msg = ("Only one mpath was expected on Node %s, but got %s" % ( node, mpaths)) self.assertEqual(1, len(mpaths), msg) validate_multipath_pod( self.ocp_client[0], pod_name, hacount, mpath=list(mpaths)[0]) return iqn, hacount, node
def test_brick_evict_on_more_than_three_node_with_one_down(self): """Test brick evict basic functionality and verify brick evict will success after one node down out of more than three nodes""" h_node, h_server = self.heketi_client_node, self.heketi_server_url # Create heketi volume vol_info = heketi_ops.heketi_volume_create(h_node, h_server, 1, json=True) self.addCleanup(heketi_ops.heketi_volume_delete, h_node, h_server, vol_info.get('id')) # Get node on which heketi pod is scheduled heketi_pod = openshift_ops.get_pod_name_from_dc( self.ocp_client, self.heketi_dc_name) heketi_node = openshift_ops.oc_get_custom_resource( self.ocp_client, 'pod', '.:spec.nodeName', heketi_pod)[0] # Get brick id and glusterfs node which is not heketi node for node in vol_info.get('bricks', {}): node_info = heketi_ops.heketi_node_info(h_node, h_server, node.get('node'), json=True) hostname = node_info.get('hostnames').get('manage')[0] if hostname != heketi_node: brick_id = node.get('id') break self._power_off_node_and_wait_node_to_be_not_ready(hostname) # Perform brick evict operation heketi_ops.heketi_brick_evict(h_node, h_server, brick_id) # Get volume info after brick evict operation vol_info_new = heketi_ops.heketi_volume_info(h_node, h_server, vol_info.get('id'), json=True) # Get previous and new bricks from volume bricks_old = set( {brick.get('path') for brick in vol_info.get("bricks")}) bricks_new = set( {brick.get('path') for brick in vol_info_new.get("bricks")}) self.assertEqual( len(bricks_new - bricks_old), 1, "Brick was not replaced with brick evict for vol \n {}".format( vol_info_new)) # Get gluster volume info g_vol_info = self._get_gluster_vol_info(vol_info_new.get('name')) # Validate bricks on gluster volume and heketi volume g_bricks = set({ brick.get('name').split(":")[1] for brick in g_vol_info.get("bricks", {}).get("brick") }) self.assertEqual( bricks_new, g_bricks, "gluster vol info and heketi vol info " "mismatched after brick evict {} \n {}".format( g_bricks, g_vol_info))
def test_pv_resize_when_heketi_down(self): """Create a PVC and try to expand it when heketi is down, It should fail. After heketi is up, expand PVC should work. """ self.create_storage_class(allow_volume_expansion=True) pvc_name = self.create_and_wait_for_pvc() dc_name, pod_name = self.create_dc_with_pvc(pvc_name) pv_name = get_pv_name_from_pvc(self.node, pvc_name) custom = (r':metadata.annotations.' r'"gluster\.kubernetes\.io\/heketi-volume-id"') vol_id = oc_get_custom_resource(self.node, 'pv', custom, pv_name)[0] h_vol_info = heketi_ops.heketi_volume_info( self.heketi_client_node, self.heketi_server_url, vol_id, json=True) # Bring the heketi POD down scale_dc_pod_amount_and_wait( self.node, self.heketi_dc_name, pod_amount=0) self.addCleanup( scale_dc_pod_amount_and_wait, self.node, self.heketi_dc_name, pod_amount=1) cmd = 'dd if=/dev/urandom of=/mnt/%s bs=614400k count=1' ret, out, err = oc_rsh(self.node, pod_name, cmd % 'file1') self.assertFalse(ret, 'Not able to write file with err: %s' % err) wait_for_pod_be_ready(self.node, pod_name, 10, 5) resize_pvc(self.node, pvc_name, 2) wait_for_events( self.node, pvc_name, obj_type='PersistentVolumeClaim', event_type='Warning', event_reason='VolumeResizeFailed') # Verify volume was not expanded vol_info = get_gluster_vol_info_by_pvc_name(self.node, pvc_name) self.assertEqual(vol_info['gluster_vol_id'], h_vol_info['name']) self.assertEqual( len(vol_info['bricks']['brick']), len(h_vol_info['bricks'])) # Bring the heketi POD up scale_dc_pod_amount_and_wait( self.node, self.heketi_dc_name, pod_amount=1) # Verify volume expansion verify_pvc_size(self.node, pvc_name, 2) vol_info = get_gluster_vol_info_by_pvc_name(self.node, pvc_name) self.assertFalse(len(vol_info['bricks']['brick']) % 3) self.assertLess( len(h_vol_info['bricks']), len(vol_info['bricks']['brick'])) # Wait for remount after expansion for w in waiter.Waiter(timeout=30, interval=5): ret, out, err = oc_rsh( self.node, pod_name, "df -Ph /mnt | awk '{print $2}' | tail -1") self.assertFalse(ret, 'Failed with err: %s and Output: %s' % ( err, out)) if out.strip() == '2.0G': break # Write data making sure we have more space than it was ret, out, err = oc_rsh(self.node, pod_name, cmd % 'file2') self.assertFalse(ret, 'Not able to write file with err: %s' % err) # Verify pod is running wait_for_pod_be_ready(self.node, pod_name, 10, 5)
def test_respin_es_pod(self, motive): """Validate respin of elastic search pod""" # Get the pod name and PVC name es_pod = openshift_ops.get_pod_name_from_dc(self._master, self._logging_es_dc) pvc_custom = ":.spec.volumes[*].persistentVolumeClaim.claimName" pvc_name = openshift_ops.oc_get_custom_resource( self._master, "pod", pvc_custom, es_pod)[0] # Validate iscsi and multipath _, _, node = self.verify_iscsi_sessions_and_multipath( pvc_name, self._logging_es_dc, heketi_server_url=self._registry_heketi_server_url, is_registry_gluster=True) if motive == 'delete': # Delete the es pod self.addCleanup(self._delete_and_wait_for_new_es_pod_to_come_up) openshift_ops.oc_delete(self._master, "pod", es_pod) elif motive == 'drain': # Get the number of infra nodes infra_node_count_cmd = ( 'oc get nodes ' '--no-headers -l node-role.kubernetes.io/infra=true|wc -l') infra_node_count = command.cmd_run(infra_node_count_cmd, self._master) # Skip test case if number infra nodes are less than #2 if int(infra_node_count) < 2: self.skipTest('Available number of infra nodes "{}", it should' ' be more than 1'.format(infra_node_count)) # Cleanup to make node schedulable cmd_schedule = ( 'oc adm manage-node {} --schedulable=true'.format(node)) self.addCleanup(command.cmd_run, cmd_schedule, hostname=self._master) # Drain the node drain_cmd = ('oc adm drain {} --force=true --ignore-daemonsets ' '--delete-local-data'.format(node)) command.cmd_run(drain_cmd, hostname=self._master) # Wait for pod to get absent openshift_ops.wait_for_resource_absence(self._master, "pod", es_pod) # Wait for new pod to come up try: pod_name = openshift_ops.get_pod_name_from_dc( self._master, self._logging_es_dc) openshift_ops.wait_for_pod_be_ready(self._master, pod_name) except exceptions.ExecutionError: self._delete_and_wait_for_new_es_pod_to_come_up() # Validate iscsi and multipath self.verify_iscsi_sessions_and_multipath( pvc_name, self._logging_es_dc, heketi_server_url=self._registry_heketi_server_url, is_registry_gluster=True)
def test_restart_prometheus_glusterfs_pod(self): """Validate restarting glusterfs pod""" # Add check for CRS version openshift_ops.switch_oc_project(self._master, self._registry_project_name) if not self.is_containerized_gluster(): self.skipTest("Skipping this test case as CRS version check " "can not be implemented") # Get one of the prometheus pod name and respective pvc name openshift_ops.switch_oc_project(self._master, self._prometheus_project_name) prometheus_pods = openshift_ops.oc_get_pods( self._master, selector=self._prometheus_resources_selector) if not prometheus_pods: self.skipTest(prometheus_pods, "Skipping test as prometheus" " pod is not present") prometheus_pod = list(prometheus_pods.keys())[0] pvc_name = openshift_ops.oc_get_custom_resource( self._master, "pod", ":.spec.volumes[*].persistentVolumeClaim.claimName", prometheus_pod)[0] self.assertTrue( pvc_name, "Failed to get pvc name from {} pod".format(prometheus_pod)) iqn, _, node = self.verify_iscsi_sessions_and_multipath( pvc_name, prometheus_pod, rtype='pod', heketi_server_url=self._registry_heketi_server_url, is_registry_gluster=True) # Get the ip of active path devices = openshift_storage_libs.get_iscsi_block_devices_by_path( node, iqn) mpath = openshift_storage_libs.get_mpath_name_from_device_name( node, list(devices.keys())[0]) mpath_dev = ( openshift_storage_libs.get_active_and_enabled_devices_from_mpath( node, mpath)) node_ip = devices[mpath_dev['active'][0]] # Get the name of gluster pod from the ip openshift_ops.switch_oc_project(self._master, self._registry_project_name) gluster_pods = openshift_ops.get_ocp_gluster_pod_details(self._master) active_pod_name = list( filter(lambda pod: (pod["pod_host_ip"] == node_ip), gluster_pods))[0]["pod_name"] err_msg = "Failed to get the gluster pod name {} with active path" self.assertTrue(active_pod_name, err_msg.format(active_pod_name)) g_pods = [pod['pod_name'] for pod in gluster_pods] g_pods.remove(active_pod_name) pod_list = [active_pod_name, g_pods[0]] for pod_name in pod_list: # Delete the glusterfs pods openshift_ops.switch_oc_project(self._master, self._prometheus_project_name) self._fetch_metric_from_promtheus_pod( metric='heketi_device_brick_count') openshift_ops.switch_oc_project(self._master, self._registry_project_name) g_pod_list_before = [ pod["pod_name"] for pod in openshift_ops.get_ocp_gluster_pod_details(self._master) ] openshift_ops.oc_delete(self._master, 'pod', pod_name) self.addCleanup(self._guster_pod_delete, g_pod_list_before) # Wait for gluster pod to be absent openshift_ops.wait_for_resource_absence(self._master, 'pod', pod_name) # Try to fetch metric from prometheus pod openshift_ops.switch_oc_project(self._master, self._prometheus_project_name) self._fetch_metric_from_promtheus_pod( metric='heketi_device_brick_count') # Wait for new pod to come up openshift_ops.switch_oc_project(self._master, self._registry_project_name) self.assertTrue( self._get_newly_deployed_gluster_pod(g_pod_list_before), "Failed to get new pod") self._wait_for_gluster_pod_be_ready(g_pod_list_before) # Validate iscsi and multipath openshift_ops.switch_oc_project(self._master, self._prometheus_project_name) self.verify_iscsi_sessions_and_multipath( pvc_name, prometheus_pod, rtype='pod', heketi_server_url=self._registry_heketi_server_url, is_registry_gluster=True) # Try to fetch metric from prometheus pod self._fetch_metric_from_promtheus_pod( metric='heketi_device_brick_count')
def test_heketi_metrics_validation_with_node_reboot(self): """Validate heketi metrics after node reboot using prometheus""" initial_metrics, final_metrics = {}, {} # Use storage project openshift_ops.switch_oc_project(self._master, self.storage_project_name) # Get initial metrics result h_node, h_server = self.heketi_client_node, self.heketi_server_url initial_metrics = tuple( heketi_ops.get_heketi_metrics(h_node, h_server).get(metric)[0] for metric in self.metrics) # Use prometheus project openshift_ops.switch_oc_project(self._master, self._prometheus_project_name) # Get initial prometheus result initial_prometheus = self._get_and_manipulate_metric_data(self.metrics) # Get hosted node IP of heketi pod openshift_ops.switch_oc_project(self._master, self.storage_project_name) heketi_pod = openshift_ops.get_pod_name_from_dc( self._master, self.heketi_dc_name) heketi_node = openshift_ops.oc_get_custom_resource( self._master, 'pod', '.:spec.nodeName', heketi_pod)[0] # Reboot the node on which heketi pod is scheduled self.addCleanup(self._check_heketi_and_gluster_pod_after_node_reboot, heketi_node) node_ops.node_reboot_by_command(heketi_node) # Wait node to become NotReady custom = r'":.status.conditions[?(@.type==\"Ready\")]".status' for w in waiter.Waiter(300, 10): status = openshift_ops.oc_get_custom_resource( self._master, 'node', custom, heketi_node) if status[0] == 'False': break if w.expired: raise exceptions.ExecutionError( "Failed to bring down node {}".format(heketi_node)) # Wait for node to become ready openshift_ops.wait_for_ocp_node_be_ready(self._master, heketi_node) # Wait for heketi and glusterfs pod to become ready self._check_heketi_and_gluster_pod_after_node_reboot(heketi_node) # Use prometheus project openshift_ops.switch_oc_project(self._master, self._prometheus_project_name) # Get final metrics result final_metrics = tuple( heketi_ops.get_heketi_metrics(h_node, h_server).get(metric)[0] for metric in self.metrics) # Get final prometheus result final_prometheus = self._get_and_manipulate_metric_data(self.metrics) err_msg = "Initial value {} is not same as final value {}" self.assertEqual(initial_metrics, final_metrics, err_msg.format(initial_metrics, final_metrics)) self.assertEqual(initial_prometheus, final_prometheus, err_msg.format(initial_prometheus, final_prometheus))
def test_udev_usage_in_container(self): """Validate LVM inside container does not use udev""" # Skip the TC if independent mode deployment if not self.is_containerized_gluster(): self.skipTest("Skipping this test case as it needs to run on " "converged mode deployment") h_client, h_url = self.heketi_client_node, self.heketi_server_url server_info = list(g.config.get('gluster_servers').values())[0] server_node = server_info.get('manage') additional_device = server_info.get('additional_devices')[0] # command to run pvscan cmd_pvscan = "timeout 300 pvscan" # Get pod name from on host for pod_info in self.pod_name: if pod_info.get('pod_hostname') == server_node: pod_name = pod_info.get('pod_name') break # Create file volume vol_info = heketi_ops.heketi_volume_create(h_client, h_url, self.volume_size, json=True) self.addCleanup(heketi_ops.heketi_volume_delete, h_client, h_url, vol_info.get("id")) # Create block volume block_vol_info = heketi_ops.heketi_blockvolume_create(h_client, h_url, self.volume_size, json=True) self.addCleanup(heketi_ops.heketi_blockvolume_delete, h_client, h_url, block_vol_info.get("id")) # Check dmeventd service in container err_msg = "dmeventd.service is running on setup" with self.assertRaises(AssertionError, msg=err_msg): openshift_ops.oc_rsh(self.oc_node, pod_name, "systemctl is-active dmeventd.service") # Service dmeventd should not be running in background with self.assertRaises(AssertionError, msg=err_msg): openshift_ops.oc_rsh(self.oc_node, pod_name, "ps aux | grep dmeventd.service") # Perform a pvscan in contaier openshift_ops.oc_rsh(self.oc_node, pod_name, cmd_pvscan) # Get heketi node to add new device heketi_node_list = heketi_ops.heketi_node_list(h_client, h_url) for h_node_id in heketi_node_list: h_node_info = heketi_ops.heketi_node_info(h_client, h_url, h_node_id, json=True) h_node_host = h_node_info.get('hostnames', {}).get('manage')[0] if h_node_host == server_node: break # Add new device to the node heketi_ops.heketi_device_add(h_client, h_url, additional_device, h_node_id) h_node_info = heketi_ops.heketi_node_info(h_client, h_url, h_node_id, json=True) h_device_id = [ device.get('id') for device in h_node_info.get('devices') if device.get('name') == additional_device ] self.addCleanup(heketi_ops.heketi_device_delete, h_client, h_url, h_device_id[0]) self.addCleanup(heketi_ops.heketi_device_remove, h_client, h_url, h_device_id[0]) self.addCleanup(heketi_ops.heketi_device_disable, h_client, h_url, h_device_id[0]) # Reboot the node on which device is added self.addCleanup(self._check_heketi_and_gluster_pod_after_node_reboot, server_node) node_ops.node_reboot_by_command(server_node) # Wait node to become NotReady custom = r'":.status.conditions[?(@.type==\"Ready\")]".status' for w in waiter.Waiter(300, 10): status = openshift_ops.oc_get_custom_resource( self.oc_node, 'node', custom, server_node) if status[0] == 'False': break if w.expired: raise exceptions.ExecutionError( "Failed to bring node down {}".format(server_node)) # Wait for node to become ready openshift_ops.wait_for_ocp_node_be_ready(self.oc_node, server_node) # Wait for heketi and glusterfs pod to become ready self._check_heketi_and_gluster_pod_after_node_reboot(server_node) # Perform a pvscan in contaier openshift_ops.oc_rsh(self.oc_node, pod_name, cmd_pvscan)
def dynamic_provisioning_glusterfile(self, create_vol_name_prefix): # Create secret and storage class self.create_storage_class( create_vol_name_prefix=create_vol_name_prefix) # Create PVC pvc_name = self.create_and_wait_for_pvc() # Create DC with POD and attached PVC to it. dc_name = oc_create_app_dc_with_io( self.node, pvc_name, image=self.io_container_image_cirros) self.addCleanup(oc_delete, self.node, 'dc', dc_name) self.addCleanup(scale_dc_pod_amount_and_wait, self.node, dc_name, 0) pod_name = get_pod_name_from_dc(self.node, dc_name) wait_for_pod_be_ready(self.node, pod_name) # Verify Heketi volume name for prefix presence if provided if create_vol_name_prefix: ret = verify_volume_name_prefix(self.node, self.sc['volumenameprefix'], self.sc['secretnamespace'], pvc_name, self.sc['resturl']) self.assertTrue(ret, "verify volnameprefix failed") else: # Get the volume name and volume id from PV pv_name = get_pv_name_from_pvc(self.ocp_client[0], pvc_name) custom = [ r':spec.glusterfs.path', r':metadata.annotations.' r'"gluster\.kubernetes\.io\/heketi-volume-id"' ] pv_vol_name, vol_id = oc_get_custom_resource( self.ocp_client[0], 'pv', custom, pv_name) # check if the pv_volume_name is present in heketi # Check if volume name is "vol_"+volumeid or not heketi_vol_name = heketi_volume_info(self.ocp_client[0], self.heketi_server_url, vol_id, json=True)['name'] self.assertEqual( pv_vol_name, heketi_vol_name, 'Volume with vol_id = %s not found' 'in heketidb' % vol_id) self.assertEqual( heketi_vol_name, 'vol_' + vol_id, 'Volume with vol_id = %s have a' 'custom perfix' % vol_id) out = cmd_run_on_gluster_pod_or_node(self.ocp_master_node[0], "gluster volume list") self.assertIn(pv_vol_name, out, "Volume with id %s does not exist" % vol_id) # Make sure we are able to work with files on the mounted volume filepath = "/mnt/file_for_testing_io.log" for cmd in ("dd if=/dev/urandom of=%s bs=1K count=100", "ls -lrt %s", "rm -rf %s"): cmd = cmd % filepath ret, out, err = oc_rsh(self.node, pod_name, cmd) self.assertEqual( ret, 0, "Failed to execute '%s' command on %s" % (cmd, self.node))
def test_set_heketi_vol_size_and_brick_amount_limits(self): # Get Heketi secret name cmd_get_heketi_secret_name = ( "oc get dc -n %s %s -o jsonpath='{.spec.template.spec.volumes" "[?(@.name==\"config\")].secret.secretName}'" % (self.storage_project_name, self.heketi_dc_name)) heketi_secret_name = self.cmd_run(cmd_get_heketi_secret_name) # Read Heketi secret data self.node = self.ocp_master_node[0] heketi_secret_data_str_base64 = oc_get_custom_resource( self.node, "secret", r":.data.'heketi\.json'", # noqa name=heketi_secret_name)[0] heketi_secret_data_str = self.cmd_run("echo %s | base64 -d" % heketi_secret_data_str_base64) heketi_secret_data = json.loads(heketi_secret_data_str) # Update Heketi secret data brick_min_size_gb, brick_max_size_gb = 2, 4 heketi_secret_data["glusterfs"].update({ "brick_min_size_gb": brick_min_size_gb, "brick_max_size_gb": brick_max_size_gb, "max_bricks_per_volume": 3, }) heketi_secret_data_patched = json.dumps(heketi_secret_data) heketi_secret_data_str_encoded = self.cmd_run( "echo '%s' |base64" % heketi_secret_data_patched).replace( '\n', '') h_client, h_server = self.heketi_client_node, self.heketi_server_url try: # Patch Heketi secret cmd_patch_heketi_secret = ( 'oc patch secret -n %s %s -p ' '"{\\"data\\": {\\"heketi.json\\": \\"%s\\"}}"') % ( self.storage_project_name, heketi_secret_name, "%s") self.cmd_run(cmd_patch_heketi_secret % heketi_secret_data_str_encoded) # Recreate the Heketi pod to make it reuse updated configuration scale_dc_pod_amount_and_wait(self.node, self.heketi_dc_name, 0) scale_dc_pod_amount_and_wait(self.node, self.heketi_dc_name, 1) # Try to create too small and too big volumes # It must fail because allowed range is not satisfied for gb in (brick_min_size_gb - 1, brick_max_size_gb + 1): try: vol_1 = heketi_volume_create(h_client, h_server, size=gb, json=True) except AssertionError: pass else: self.addCleanup(heketi_volume_delete, h_client, h_server, vol_1['id']) self.assertFalse( vol_1, "Volume '%s' got unexpectedly created. Heketi server " "configuration haven't made required effect." % (vol_1.get('id', 'failed_to_get_heketi_vol_id'))) # Create the smallest allowed volume vol_2 = heketi_volume_create(h_client, h_server, size=brick_min_size_gb, json=True) self.addCleanup(heketi_volume_delete, h_client, h_server, vol_2['id']) # Try to expand volume, it must fail due to the brick amount limit self.assertRaises(AssertionError, heketi_volume_expand, h_client, h_server, vol_2['id'], 2) # Create the largest allowed volume vol_3 = heketi_volume_create(h_client, h_server, size=brick_max_size_gb, json=True) heketi_volume_delete(h_client, h_server, vol_3['id']) finally: # Revert the Heketi configuration back self.cmd_run(cmd_patch_heketi_secret % heketi_secret_data_str_base64) scale_dc_pod_amount_and_wait(self.node, self.heketi_dc_name, 0) scale_dc_pod_amount_and_wait(self.node, self.heketi_dc_name, 1) # Create volume less than the old minimum limit vol_4 = heketi_volume_create(h_client, h_server, size=(brick_min_size_gb - 1), json=True) self.addCleanup(heketi_volume_delete, h_client, h_server, vol_4['id']) # Create volume bigger than the old maximum limit and expand it vol_5 = heketi_volume_create(h_client, h_server, size=(brick_max_size_gb + 1), json=True) self.addCleanup(heketi_volume_delete, h_client, h_server, vol_5['id']) heketi_volume_expand(h_client, h_server, vol_5['id'], 2)
def test_expansion_of_block_hosting_volume_using_heketi(self): """Verify that after expanding block hosting volume we are able to consume the expanded space""" h_node = self.heketi_client_node h_url = self.heketi_server_url bvols_in_bhv = set([]) bvols_pv = set([]) BHVS = get_block_hosting_volume_list(h_node, h_url) free_BHVS_count = 0 for vol in BHVS.keys(): info = heketi_volume_info(h_node, h_url, vol, json=True) if info['blockinfo']['freesize'] > 0: free_BHVS_count += 1 if free_BHVS_count > 1: self.skipTest("Skip test case because there is more than one" " Block Hosting Volume with free space") # create block volume of 1gb bvol_info = heketi_blockvolume_create(h_node, h_url, 1, json=True) expand_size = 20 try: self.verify_free_space(expand_size) bhv = bvol_info['blockhostingvolume'] vol_info = heketi_volume_info(h_node, h_url, bhv, json=True) bvols_in_bhv.update(vol_info['blockinfo']['blockvolume']) finally: # cleanup BHV if there is only one block volume inside it if len(bvols_in_bhv) == 1: self.addCleanup( heketi_volume_delete, h_node, h_url, bhv, json=True) self.addCleanup( heketi_blockvolume_delete, h_node, h_url, bvol_info['id']) size = vol_info['size'] free_size = vol_info['blockinfo']['freesize'] bvol_count = int(free_size / expand_size) bricks = vol_info['bricks'] # create pvs to fill the BHV pvcs = self.create_and_wait_for_pvcs( pvc_size=(expand_size if bvol_count else free_size), pvc_amount=(bvol_count or 1), timeout=300) vol_expand = True for i in range(2): # get the vol ids from pvcs for pvc in pvcs: pv = get_pv_name_from_pvc(self.node, pvc) custom = r':.metadata.annotations."gluster\.org\/volume-id"' bvol_id = oc_get_custom_resource(self.node, 'pv', custom, pv) bvols_pv.add(bvol_id[0]) vol_info = heketi_volume_info(h_node, h_url, bhv, json=True) bvols = vol_info['blockinfo']['blockvolume'] bvols_in_bhv.update(bvols) self.assertEqual(bvols_pv, (bvols_in_bhv & bvols_pv)) # Expand BHV and verify bricks and size of BHV if vol_expand: vol_expand = False heketi_volume_expand( h_node, h_url, bhv, expand_size, json=True) vol_info = heketi_volume_info(h_node, h_url, bhv, json=True) self.assertEqual(size + expand_size, vol_info['size']) self.assertFalse(len(vol_info['bricks']) % 3) self.assertLess(len(bricks), len(vol_info['bricks'])) # create more PVCs in expanded BHV pvcs = self.create_and_wait_for_pvcs( pvc_size=(expand_size - 1), pvc_amount=1)
def test_heketi_node_add_with_valid_cluster(self): """Test heketi node add operation with valid cluster id""" if (openshift_storage_version.get_openshift_storage_version() < "3.11.4"): self.skipTest( "This test case is not supported for < OCS 3.11.4 builds due " "to bug BZ-1732831") h_client, h_server = self.heketi_client_node, self.heketi_server_url ocp_node = self.ocp_master_node[0] # Get heketi endpoints before adding node h_volume_ids = heketi_ops.heketi_volume_list( h_client, h_server, json=True) h_endpoints_before_new_node = heketi_ops.heketi_volume_endpoint_patch( h_client, h_server, h_volume_ids["volumes"][0]) cluster_info = heketi_ops.heketi_cluster_list( h_client, h_server, json=True) storage_hostname, storage_ip = self.add_heketi_node_to_cluster( cluster_info["clusters"][0]) # Get heketi nodes and validate for newly added node h_node_ids = heketi_ops.heketi_node_list(h_client, h_server, json=True) for h_node_id in h_node_ids: node_hostname = heketi_ops.heketi_node_info( h_client, h_server, h_node_id, json=True) if node_hostname["hostnames"]["manage"][0] == storage_hostname: break node_hostname = None err_msg = ("Newly added heketi node %s not found in heketi node " "list %s" % (storage_hostname, h_node_ids)) self.assertTrue(node_hostname, err_msg) # Check gluster peer status for newly added node if self.is_containerized_gluster(): gluster_pods = openshift_ops.get_ocp_gluster_pod_details(ocp_node) gluster_pod = [ gluster_pod["pod_name"] for gluster_pod in gluster_pods if gluster_pod["pod_hostname"] == storage_hostname][0] gluster_peer_status = peer_ops.get_peer_status( podcmd.Pod(ocp_node, gluster_pod)) else: gluster_peer_status = peer_ops.get_peer_status( storage_hostname) self.assertEqual( len(gluster_peer_status), len(self.gluster_servers)) err_msg = "Expected peer status is 1 and actual is %s" for peer in gluster_peer_status: peer_status = int(peer["connected"]) self.assertEqual(peer_status, 1, err_msg % peer_status) # Get heketi endpoints after adding node h_endpoints_after_new_node = heketi_ops.heketi_volume_endpoint_patch( h_client, h_server, h_volume_ids["volumes"][0]) # Get openshift openshift endpoints and patch with heketi endpoints heketi_db_endpoint = openshift_ops.oc_get_custom_resource( ocp_node, "dc", name=self.heketi_dc_name, custom=".:spec.template.spec.volumes[*].glusterfs.endpoints")[0] openshift_ops.oc_patch( ocp_node, "ep", heketi_db_endpoint, h_endpoints_after_new_node) self.addCleanup( openshift_ops.oc_patch, ocp_node, "ep", heketi_db_endpoint, h_endpoints_before_new_node) ep_addresses = openshift_ops.oc_get_custom_resource( ocp_node, "ep", name=heketi_db_endpoint, custom=".:subsets[*].addresses[*].ip")[0].split(",") err_msg = "Hostname %s not present in endpoints %s" % ( storage_ip, ep_addresses) self.assertIn(storage_ip, ep_addresses, err_msg)
def initiator_side_failures(self): # get storage ips of glusterfs pods keys = self.gluster_servers gluster_ips = [] for key in keys: gluster_ips.append(self.gluster_servers_info[key]['storage']) gluster_ips.sort() self.create_storage_class() self.create_and_wait_for_pvc() # find iqn and hacount from volume info pv_name = get_pv_name_from_pvc(self.node, self.pvc_name) custom = [r':.metadata.annotations."gluster\.org\/volume\-id"'] vol_id = oc_get_custom_resource(self.node, 'pv', custom, pv_name)[0] vol_info = heketi_blockvolume_info( self.heketi_client_node, self.heketi_server_url, vol_id, json=True) iqn = vol_info['blockvolume']['iqn'] hacount = int(self.sc['hacount']) # create app pod dc_name, pod_name = self.create_dc_with_pvc(self.pvc_name) # When we have to verify iscsi login devices & mpaths, we run it twice for i in range(2): # get node hostname from pod info pod_info = oc_get_pods( self.node, selector='deploymentconfig=%s' % dc_name) node = pod_info[pod_name]['node'] # get the iscsi sessions info from the node iscsi = get_iscsi_session(node, iqn) self.assertEqual(hacount, len(iscsi)) iscsi.sort() self.assertEqual(set(iscsi), (set(gluster_ips) & set(iscsi))) # get the paths info from the node devices = get_iscsi_block_devices_by_path(node, iqn).keys() self.assertEqual(hacount, len(devices)) # get mpath names and verify that only one mpath is there mpaths = set() for device in devices: mpaths.add(get_mpath_name_from_device_name(node, device)) self.assertEqual(1, len(mpaths)) validate_multipath_pod( self.node, pod_name, hacount, mpath=list(mpaths)[0]) # When we have to verify iscsi session logout, we run only once if i == 1: break # make node unschedulabe where pod is running oc_adm_manage_node( self.node, '--schedulable=false', nodes=[node]) # make node schedulabe where pod is running self.addCleanup( oc_adm_manage_node, self.node, '--schedulable=true', nodes=[node]) # delete pod so it get respun on any other node oc_delete(self.node, 'pod', pod_name) wait_for_resource_absence(self.node, 'pod', pod_name) # wait for pod to come up pod_name = get_pod_name_from_dc(self.node, dc_name) wait_for_pod_be_ready(self.node, pod_name) # get the iscsi session from the previous node to verify logout iscsi = get_iscsi_session(node, iqn, raise_on_error=False) self.assertFalse(iscsi)
def test_metrics_workload_on_prometheus(self): """Validate metrics workload on prometheus""" # Skip test if the prometheus pods are not present openshift_ops.switch_oc_project( self._master, self._prometheus_project_name) prometheus_pods = openshift_ops.oc_get_pods( self._master, selector=self._prometheus_resources_selector) if not prometheus_pods: self.skipTest( prometheus_pods, "Skipping test as prometheus" " pod is not present") if not self.registry_sc: self.skipTest( prometheus_pods, "Skipping test as registry " " storage details are not provided") self._registry_project = self.registry_sc.get( 'restsecretnamespace') self.prefix = "autotest-{}".format(utils.get_random_str()) # Get one of the prometheus pod name and respective pvc name prometheus_pod = list(prometheus_pods.keys())[0] pvc_custom = ":.spec.volumes[*].persistentVolumeClaim.claimName" pvc_name = openshift_ops.oc_get_custom_resource( self._master, "pod", pvc_custom, prometheus_pod)[0] self.assertTrue( pvc_name, "Failed to get PVC name for prometheus" " pod {}".format(prometheus_pod)) self.verify_iscsi_sessions_and_multipath( pvc_name, prometheus_pod, rtype='pod', heketi_server_url=self._registry_heketi_server_url, is_registry_gluster=True) # Try to fetch metric from the prometheus pod self._fetch_metric_from_promtheus_pod( metric='kube_persistentvolumeclaim_info') # Create storage class openshift_ops.switch_oc_project( self._master, self._registry_project) self.sc_name = self.create_storage_class( vol_name_prefix=self.prefix, glusterfs_registry=True) self.addCleanup(openshift_ops.switch_oc_project, self._master, self._registry_project) # Create PVCs and app pods pvc_size, pvc_count, batch_count = 1, 5, 5 for _ in range(batch_count): test_pvc_names = self.create_and_wait_for_pvcs( pvc_size, pvc_name_prefix=self.prefix, pvc_amount=pvc_count, sc_name=self.sc_name, timeout=600, wait_step=10) self.create_dcs_with_pvc( test_pvc_names, timeout=600, wait_step=5, dc_name_prefix="autotests-dc-with-app-io", space_to_use=1048576) # Check from the prometheus pod for the PVC space usage openshift_ops.switch_oc_project( self._master, self._prometheus_project_name) mount_path = "/prometheus" cmd = "oc exec {0} -- df -PT {1} | grep {1}".format( prometheus_pod, mount_path) out = self.cmd_run(cmd) self.assertTrue(out, "Failed to get info about mounted volume. " "Output is empty.") # Try to fetch metric from prometheus pod self._fetch_metric_from_promtheus_pod( metric='kube_persistentvolumeclaim_info') self._fetch_metric_from_promtheus_pod( metric='kube_pod_spec_volumes_persistentvolumeclaims_info') self.addCleanup(openshift_ops.switch_oc_project, self._master, self._registry_project)