def test_shd_should_not_crash_executed_heal_info(self): """ - set "entry-self-heal", "metadata-self-heal", "data-self-heal" to off - write a few files - bring down brick0 - add IO - do a heal info and check for files pending heal on last 2 bricks - set "performance.enable-least-priority" to "enable" - bring down brick1 - set the "quorum-type" to "fixed" - add IO - do a heal info and check for files pending heal on the last brick """ # pylint: disable=too-many-statements bricks_list = get_all_bricks(self.mnode, self.volname) # Setting options g.log.info('Setting options...') options = { "metadata-self-heal": "off", "entry-self-heal": "off", "data-self-heal": "off" } ret = set_volume_options(self.mnode, self.volname, options) self.assertTrue(ret, 'Failed to set options %s' % options) g.log.info("Successfully set %s for volume %s", options, self.volname) # Creating files on client side for mount_obj in self.mounts: g.log.info("Generating data for %s:%s", mount_obj.client_system, mount_obj.mountpoint) # Create files g.log.info('Creating files...') command = ("/usr/bin/env python %s create_files -f 10 " "--fixed-file-size 1M %s" % (self.script_upload_path, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, command, user=mount_obj.user) self.all_mounts_procs.append(proc) self.io_validation_complete = False # Validate IO self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts), "IO failed on some of the clients") self.io_validation_complete = True # Bring brick0 offline g.log.info('Bringing bricks %s offline', bricks_list[0]) ret = bring_bricks_offline(self.volname, bricks_list[0]) self.assertTrue(ret, 'Failed to bring bricks %s offline' % bricks_list[0]) ret = are_bricks_offline(self.mnode, self.volname, [bricks_list[0]]) self.assertTrue(ret, 'Bricks %s are not offline' % bricks_list[0]) g.log.info('Bringing bricks %s offline is successful', bricks_list[0]) # Creating files on client side number_of_files_one_brick_off = '1000' self.all_mounts_procs = [] for mount_obj in self.mounts: g.log.info("Generating data for %s:%s", mount_obj.client_system, mount_obj.mountpoint) # Create files g.log.info('Creating files...') command = ("/usr/bin/env python %s create_files " "-f %s " "--fixed-file-size 1k " "--base-file-name new_file " "%s" % (self.script_upload_path, number_of_files_one_brick_off, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, command, user=mount_obj.user) self.all_mounts_procs.append(proc) self.io_validation_complete = False # Validate IO self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts), "IO failed on some of the clients") self.io_validation_complete = True # Get heal info g.log.info("Getting heal info...") heal_info_data = get_heal_info_summary(self.mnode, self.volname) self.assertIsNotNone(heal_info_data, 'Failed to get heal info.') g.log.info('Success in getting heal info') # Check quantity of file pending heal for brick in bricks_list[1:]: self.assertEqual(heal_info_data[brick]['numberOfEntries'], str(int(number_of_files_one_brick_off) + 1), 'Number of files pending heal is not correct') # Setting options g.log.info('Setting options...') options = {"performance.enable-least-priority": "enable"} ret = set_volume_options(self.mnode, self.volname, options) self.assertTrue(ret, 'Failed to set options %s' % options) g.log.info("Successfully set %s for volume %s", options, self.volname) # Bring brick1 offline g.log.info('Bringing bricks %s offline', bricks_list[1]) ret = bring_bricks_offline(self.volname, bricks_list[1]) self.assertTrue(ret, 'Failed to bring bricks %s offline' % bricks_list[1]) ret = are_bricks_offline(self.mnode, self.volname, [bricks_list[1]]) self.assertTrue(ret, 'Bricks %s are not offline' % bricks_list[1]) g.log.info('Bringing bricks %s offline is successful', bricks_list[1]) # Setting options g.log.info('Setting options...') options = {"quorum-type": "fixed"} ret = set_volume_options(self.mnode, self.volname, options) self.assertTrue(ret, 'Failed to set options %s' % options) g.log.info("Successfully set %s for volume %s", options, self.volname) # Creating files on client side number_of_files_two_brick_off = '100' self.all_mounts_procs = [] for mount_obj in self.mounts: g.log.info("Generating data for %s:%s", mount_obj.client_system, mount_obj.mountpoint) # Create files g.log.info('Creating files...') command = ("/usr/bin/env python %s create_files " "-f %s " "--fixed-file-size 1k " "--base-file-name new_new_file " "%s" % (self.script_upload_path, number_of_files_two_brick_off, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, command, user=mount_obj.user) self.all_mounts_procs.append(proc) self.io_validation_complete = False # Validate IO self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts), "IO failed on some of the clients") self.io_validation_complete = True # Get heal info g.log.info("Getting heal info...") heal_info_data = get_heal_info_summary(self.mnode, self.volname) self.assertIsNotNone(heal_info_data, 'Failed to get heal info.') g.log.info('Success in getting heal info') # Check quantity of file pending heal number_of_files_to_check = str( int(number_of_files_one_brick_off) + int(number_of_files_two_brick_off) + 1) self.assertEqual(heal_info_data[bricks_list[-1]]['numberOfEntries'], number_of_files_to_check, 'Number of files pending heal is not correct')
def test_heal_info_should_have_fixed_fields(self): """ - Create IO - While IO is creating - bring down a couple of bricks - Wait for IO to complete - Bring up the down bricks - Wait for heal to complete - Check for fields 'Brick', 'Status', 'Number of entries' in heal info """ # Creating files on client side for mount_obj in self.mounts: g.log.info("Generating data for %s:%s", mount_obj.client_system, mount_obj.mountpoint) # Create files g.log.info('Creating files...') command = ("/usr/bin/env python %s create_deep_dirs_with_files " "-d 2 -l 2 -f 50 %s" % (self.script_upload_path, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, command, user=mount_obj.user) self.all_mounts_procs.append(proc) self.io_validation_complete = False # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) bricks_to_bring_offline = list( filter(None, (bricks_to_bring_offline_dict['hot_tier_bricks'] + bricks_to_bring_offline_dict['cold_tier_bricks'] + bricks_to_bring_offline_dict['volume_bricks']))) # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) self.assertTrue( ret, 'Failed to bring bricks %s offline' % bricks_to_bring_offline) ret = are_bricks_offline(self.mnode, self.volname, bricks_to_bring_offline) self.assertTrue(ret, 'Bricks %s are not offline' % bricks_to_bring_offline) g.log.info('Bringing bricks %s offline is successful', bricks_to_bring_offline) # Validate IO self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts), "IO failed on some of the clients") self.io_validation_complete = True # Bring brick online g.log.info('Bringing bricks %s online...', bricks_to_bring_offline) ret = bring_bricks_online(self.mnode, self.volname, bricks_to_bring_offline) self.assertTrue( ret, 'Failed to bring bricks %s online' % bricks_to_bring_offline) g.log.info('Bringing bricks %s online is successful', bricks_to_bring_offline) # Monitor heal completion ret = monitor_heal_completion(self.mnode, self.volname) self.assertTrue(ret, 'Heal has not yet completed') # Check if heal is completed ret = is_heal_complete(self.mnode, self.volname) self.assertTrue(ret, 'Heal is not complete') g.log.info('Heal is completed successfully') # Check for split-brain ret = is_volume_in_split_brain(self.mnode, self.volname) self.assertFalse(ret, 'Volume is in split-brain state') g.log.info('Volume is not in split-brain state') # Get heal info g.log.info('Getting heal info...') heal_info_dicts = get_heal_info_summary(self.mnode, self.volname) self.assertFalse(ret, 'Failed to get heal info') g.log.info(heal_info_dicts) bricks_list = get_all_bricks(self.mnode, self.volname) self.assertIsNotNone(bricks_list, 'Brick list is None') # Check all fields in heal info dict g.log.info('Checking for all the fields in heal info...') for brick in bricks_list: g.log.info('Checking fields for %s', brick) self.assertEqual(heal_info_dicts[brick]['status'], 'Connected', 'Status is not Connected for brick %s' % brick) self.assertEqual(heal_info_dicts[brick]['numberOfEntries'], '0', 'numberOfEntries is not 0 for brick %s' % brick) g.log.info('Successfully checked for all the fields in heal info')
def test_server_side_healing_happens_only_when_glustershd_running(self): """ Test Script which verifies that the server side healing must happen only if the heal daemon is running on the node where source brick resides. * Create and start the Replicate volume * Check the glustershd processes - Only 1 glustershd should be listed * Bring down the bricks without affecting the cluster * Create files on volume * kill the glustershd on node where bricks is running * bring the bricks up which was killed in previous steps * check the heal info - heal info must show pending heal info, heal shouldn't happen since glustershd is down on source node * issue heal * trigger client side heal * heal should complete successfully """ # pylint: disable=too-many-locals,too-many-statements,too-many-lines # Setting Volume options options = { "metadata-self-heal": "on", "entry-self-heal": "on", "data-self-heal": "on" } ret = set_volume_options(self.mnode, self.volname, options) self.assertTrue(ret, 'Failed to set options %s' % options) g.log.info("Successfully set %s for volume %s", options, self.volname) # Check the self-heal daemon process ret, pids = get_self_heal_daemon_pid(self.servers) self.assertTrue(ret, ("Either No self heal daemon process found or " "more than One self heal daemon process " "found : %s" % pids)) g.log.info( "Successful in verifying self heal daemon process" " on all nodes %s", self.servers) # Select the bricks to bring offline bricks_to_bring_offline = (select_volume_bricks_to_bring_offline( self.mnode, self.volname)) g.log.info("Brick List to bring offline : %s", bricks_to_bring_offline) # Bring down the selected bricks ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) self.assertTrue(ret, "Failed to bring down the bricks") g.log.info("Brought down the brick process " "for %s", bricks_to_bring_offline) # Write files on all mounts all_mounts_procs, num_files_to_write = [], 100 for mount_obj in self.mounts: cmd = ("/usr/bin/env python %s create_files " "-f %s --base-file-name file %s" % (self.script_upload_path, num_files_to_write, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_mounts_procs.append(proc) # Validate IO ret = validate_io_procs(all_mounts_procs, self.mounts) self.assertTrue(ret, "IO failed on some of the clients") g.log.info("IO is successful on all mounts") # Get online bricks list online_bricks = get_online_bricks_list(self.mnode, self.volname) g.log.info("Online Bricks for volume %s : %s", self.volname, online_bricks) # Get the nodes where bricks are running bring_offline_glustershd_nodes = [] for brick in online_bricks: bring_offline_glustershd_nodes.append(brick.split(":")[0]) g.log.info("self heal deamon on nodes %s to be killed", bring_offline_glustershd_nodes) # Kill the self heal daemon process on nodes ret = bring_self_heal_daemon_process_offline( bring_offline_glustershd_nodes) self.assertTrue( ret, ("Unable to bring self heal daemon process" " offline for nodes %s" % bring_offline_glustershd_nodes)) g.log.info( "Sucessfully brought down self heal process for " "nodes %s", bring_offline_glustershd_nodes) # Check the heal info heal_info = get_heal_info_summary(self.mnode, self.volname) g.log.info("Successfully got heal info %s for the volume %s", heal_info, self.volname) # Bring bricks online ret = bring_bricks_online(self.mnode, self.volname, bricks_to_bring_offline, 'glusterd_restart') self.assertTrue( ret, ("Failed to bring bricks: %s online" % bricks_to_bring_offline)) # Issue heal ret = trigger_heal_full(self.mnode, self.volname) self.assertFalse(ret, ("Able to trigger heal on volume %s where " "self heal daemon is not running" % self.volname)) g.log.info( "Expected : Unable to trigger heal on volume %s where " "self heal daemon is not running", self.volname) # Wait for 130 sec to heal ret = monitor_heal_completion(self.mnode, self.volname, 130) self.assertFalse(ret, ("Heal Completed on volume %s" % self.volname)) g.log.info("Expected : Heal pending on volume %s", self.volname) # Check the heal info heal_info_after_triggering_heal = get_heal_info_summary( self.mnode, self.volname) g.log.info("Successfully got heal info for the volume %s", self.volname) # Compare with heal pending with the files wrote for node in online_bricks: self.assertGreaterEqual( int(heal_info_after_triggering_heal[node]['numberOfEntries']), num_files_to_write, ("Some of the files are healed from source bricks %s where " "self heal daemon is not running" % node)) g.log.info("EXPECTED: No files are healed from source bricks where " "self heal daemon is not running") # Unmount and Mount volume again as volume options were set # after mounting the volume for mount_obj in self.mounts: ret, _, _ = umount_volume(mount_obj.client_system, mount_obj.mountpoint) self.assertEqual(ret, 0, "Failed to unmount %s" % mount_obj.client_system) ret, _, _ = mount_volume(self.volname, mtype='glusterfs', mpoint=mount_obj.mountpoint, mserver=self.mnode, mclient=mount_obj.client_system) self.assertEqual(ret, 0, "Failed to mount %s" % mount_obj.client_system) all_mounts_procs = [] for mount_obj in self.mounts: cmd = ("/usr/bin/env python %s read %s" % (self.script_upload_path, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_mounts_procs.append(proc) # Validate IO ret = validate_io_procs(all_mounts_procs, self.mounts) self.assertTrue(ret, "Reads failed on some of the clients") g.log.info("Reads successful on all mounts") # Wait for heal to complete ret = monitor_heal_completion(self.mnode, self.volname) self.assertTrue(ret, "Unable to heal the pending entries") g.log.info("Successfully healed the pending entries for volume %s", self.volname)
def test_existing_glustershd_should_take_care_of_self_healing(self): """ Test Script which verifies that the existing glustershd should take care of self healing * Create and start the Replicate volume * Check the glustershd processes - Note the pids * Bring down the One brick ( lets say brick1) without affecting the cluster * Create 1000 files on volume * bring the brick1 up which was killed in previous steps * check the heal info - proactive self healing should start * Bring down brick1 again * wait for 60 sec and brought up the brick1 * Check the glustershd processes - pids should be different * Monitor the heal till its complete """ # pylint: disable=too-many-locals,too-many-lines,too-many-statements nodes = self.servers # check the self-heal daemon process g.log.info("Starting to get self-heal daemon process on " "nodes %s", nodes) ret, pids = get_self_heal_daemon_pid(nodes) self.assertTrue(ret, ("Either No self heal daemon process found or " "more than One self heal daemon process " "found : %s" % pids)) g.log.info( "Successful in getting Single self heal daemon process" " on all nodes %s", nodes) glustershd_pids = pids # select the bricks to bring offline g.log.info("Selecting bricks to brought offline for volume %s", self.volname) bricks_to_bring_offline = \ select_volume_bricks_to_bring_offline(self.mnode, self.volname) g.log.info("Brick List to bring offline : %s", bricks_to_bring_offline) # Bring down the selected bricks g.log.info("Going to bring down the brick process " "for %s", bricks_to_bring_offline) ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) self.assertTrue(ret, ("Failed to bring down the bricks. Please " "check the log file for more details.")) g.log.info("Brought down the brick process " "for %s successfully", bricks_to_bring_offline) # get the bricks which are running g.log.info("getting the brick list which are online") online_bricks = get_online_bricks_list(self.mnode, self.volname) g.log.info("Online Bricks for volume %s : %s", self.volname, online_bricks) # write 1MB files to the mounts g.log.info("Starting IO on all mounts...") g.log.info("mounts: %s", self.mounts) all_mounts_procs = [] cmd = ("for i in `seq 1 1000`; " "do dd if=/dev/urandom of=%s/file_$i " "bs=1M count=1; " "done" % self.mounts[0].mountpoint) g.log.info(cmd) proc = g.run_async(self.mounts[0].client_system, cmd, user=self.mounts[0].user) all_mounts_procs.append(proc) # Validate IO self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts), "IO failed on some of the clients") # check the heal info g.log.info("Get the pending heal info for the volume %s", self.volname) heal_info = get_heal_info_summary(self.mnode, self.volname) g.log.info("Successfully got heal info for the volume %s", self.volname) g.log.info("Heal Info for volume %s : %s", self.volname, heal_info) # Bring bricks online g.log.info("Bring bricks: %s online", bricks_to_bring_offline) ret = bring_bricks_online(self.mnode, self.volname, bricks_to_bring_offline, 'glusterd_restart') self.assertTrue( ret, ("Failed to bring bricks: %s online" % bricks_to_bring_offline)) g.log.info("Successfully brought all bricks: %s online", bricks_to_bring_offline) # Wait for 90 sec to start self healing g.log.info('Waiting for 90 sec to start self healing') time.sleep(90) # check the heal info g.log.info("Get the pending heal info for the volume %s", self.volname) heal_info_after_brick_online = get_heal_info_summary( self.mnode, self.volname) g.log.info("Successfully got heal info for the volume %s", self.volname) g.log.info("Heal Info for volume %s : %s", self.volname, heal_info_after_brick_online) # check heal pending is decreased flag = False for brick in online_bricks: if int(heal_info_after_brick_online[brick]['numberOfEntries'])\ < int(heal_info[brick]['numberOfEntries']): flag = True break self.assertTrue(flag, "Pro-active self heal is not started") g.log.info("Pro-active self heal is started") # bring down bricks again g.log.info("Going to bring down the brick process " "for %s", bricks_to_bring_offline) ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) self.assertTrue(ret, ("Failed to bring down the bricks. Please " "check the log file for more details.")) g.log.info("Brought down the brick process " "for %s successfully", bricks_to_bring_offline) # wait for 60 sec and brought up the brick again g.log.info('waiting for 60 sec and brought up the brick again') time.sleep(60) g.log.info("Bring bricks: %s online", bricks_to_bring_offline) ret = bring_bricks_online(self.mnode, self.volname, bricks_to_bring_offline, 'glusterd_restart') self.assertTrue( ret, ("Failed to bring bricks: %s online" % bricks_to_bring_offline)) g.log.info("Successfully brought all bricks: %s online", bricks_to_bring_offline) # Verfiy glustershd process releases its parent process ret = is_shd_daemonized(nodes) self.assertTrue(ret, ("Either No self heal daemon process found or " "more than One self heal daemon process found")) # check the self-heal daemon process g.log.info("Starting to get self-heal daemon process on " "nodes %s", nodes) ret, pids = get_self_heal_daemon_pid(nodes) self.assertTrue(ret, ("Either No self heal daemon process found or " "more than One self heal daemon process " "found : %s" % pids)) g.log.info( "Successful in getting Single self heal daemon process" " on all nodes %s", nodes) shd_pids_after_bricks_online = pids # compare the glustershd pids self.assertNotEqual(glustershd_pids, shd_pids_after_bricks_online, ("self heal daemon process are same before and " "after bringing up bricks online")) g.log.info("EXPECTED : self heal daemon process are different before " "and after bringing up bricks online") # wait for heal to complete g.log.info("Monitoring the heal.....") ret = monitor_heal_completion(self.mnode, self.volname) self.assertTrue(ret, ("Heal is not completed on volume %s" % self.volname)) g.log.info("Heal Completed on volume %s", self.volname) # Check if heal is completed ret = is_heal_complete(self.mnode, self.volname) self.assertTrue(ret, 'Heal is not complete') g.log.info('Heal is completed successfully')
def test_metadata_self_heal_on_open_fd(self): """ Description: Pro-active metadata self heal on open fd Steps : 1) Create a volume. 2) Mount the volume using FUSE. 3) Create test executable on volume mount. 4) While test execution is in progress, bring down brick1. 5) From mount point, change ownership, permission, group id of the test file. 6) While test execution is in progress, bring back brick1 online. 7) Do stat on the test file to check ownership, permission, group id on mount point and on bricks 8) Stop test execution. 9) Do stat on the test file to check ownership, permission, group id on mount point and on bricks. 10) There should be no pending heals in the heal info command. 11) There should be no split-brain. 12) Calculate arequal of the bricks and mount point and it should be same. """ # pylint: disable=too-many-statements,too-many-locals # pylint: disable=too-many-branches bricks_list = get_all_bricks(self.mnode, self.volname) self.assertIsNotNone(bricks_list, 'Brick list is None') client = self.clients[0] # Create test executable file on mount point m_point = self.mounts[0].mountpoint test_file = "testfile.sh" cmd = ("echo 'while true; do echo 'Press CTRL+C to stop execution';" " done' >> {}/{}".format(m_point, test_file)) ret, _, _ = g.run(client, cmd) self.assertEqual(ret, 0, "Failed to create test file") # Execute the test file cmd = "cd {}; sh {}".format(m_point, test_file) g.run_async(client, cmd) # Get pid of the test file _cmd = "ps -aux | grep -v grep | grep testfile.sh | awk '{print $2}'" ret, out, _ = g.run(client, _cmd) self.assertEqual(ret, 0, "Failed to get pid of test file execution") # Bring brick1 offline ret = bring_bricks_offline(self.volname, [bricks_list[1]]) self.assertTrue( ret, 'Failed to bring bricks {} ' 'offline'.format(bricks_list[1])) ret = are_bricks_offline(self.mnode, self.volname, [bricks_list[1]]) self.assertTrue(ret, 'Bricks {} are not ' 'offline'.format(bricks_list[1])) # change uid, gid and permission from client cmd = "chown {} {}/{}".format(self.user, m_point, test_file) ret, _, _ = g.run(client, cmd) self.assertEqual(ret, 0, "chown failed") cmd = "chgrp {} {}/{}".format(self.user, m_point, test_file) ret, _, _ = g.run(client, cmd) self.assertEqual(ret, 0, "chgrp failed") cmd = "chmod 777 {}/{}".format(m_point, test_file) ret, _, _ = g.run(client, cmd) self.assertEqual(ret, 0, "chown failed") # Bring brick1 online ret = bring_bricks_online(self.mnode, self.volname, [bricks_list[1]]) self.assertTrue( ret, 'Failed to bring bricks {} online'.format(bricks_list[1])) ret = get_pathinfo(client, "{}/{}".format(m_point, test_file)) self.assertIsNotNone( ret, "Unable to get " "trusted.glusterfs.pathinfo of file") nodes_to_check = {} bricks_list = [] for brick in ret['brickdir_paths']: node, brick_path = brick.split(':') if node[0:2].isdigit(): nodes_to_check[node] = os.path.dirname(brick_path) path = node + ":" + os.path.dirname(brick_path) else: nodes_to_check[gethostbyname(node)] = ( os.path.dirname(brick_path)) path = gethostbyname(node) + ":" + os.path.dirname(brick_path) bricks_list.append(path) nodes_to_check[client] = m_point # Verify that the changes are successful on bricks and client self._verify_stat_info(nodes_to_check, test_file) # Kill the test executable file for pid in out.split('\n')[:-1]: cmd = "kill -s 9 {}".format(pid) ret, _, _ = g.run(client, cmd) self.assertEqual(ret, 0, "Failed to kill test file execution") # Verify that the changes are successful on bricks and client self._verify_stat_info(nodes_to_check, test_file) # Verify there are no pending heals heal_info = get_heal_info_summary(self.mnode, self.volname) self.assertIsNotNone(heal_info, 'Unable to get heal info') for brick in bricks_list: self.assertEqual(int(heal_info[brick]['numberOfEntries']), 0, ("Pending heal on brick {} ".format(brick))) # Check for split-brain ret = is_volume_in_split_brain(self.mnode, self.volname) self.assertFalse(ret, 'Volume is in split-brain state') g.log.info('Volume is not in split-brain state') # Get arequal for mount ret, arequals = collect_mounts_arequal(self.mounts) self.assertTrue(ret, 'Failed to get arequal') mount_point_total = arequals[0].splitlines()[-1].split(':')[-1] # Collecting data bricks vol_info = get_volume_info(self.mnode, self.volname) self.assertIsNotNone(vol_info, 'Unable to get volume info') data_brick_list = [] for brick in bricks_list: for brick_info in vol_info[self.volname]["bricks"]["brick"]: if brick_info["name"] == brick: if brick_info["isArbiter"] == "0": data_brick_list.append(brick) bricks_list = data_brick_list # Get arequal on bricks and compare with mount_point_total # It should be the same arbiter = self.volume_type.find('arbiter') >= 0 subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] stop = len(subvols[0]) - 1 if arbiter else len(subvols[0]) for subvol in subvols: subvol = [i for i in subvol if i in bricks_list] if subvol: ret, arequal = collect_bricks_arequal(subvol[0:stop]) self.assertTrue( ret, 'Unable to get arequal checksum ' 'on {}'.format(subvol[0:stop])) self.assertEqual( len(set(arequal)), 1, 'Mismatch of arequal ' 'checksum among {} is ' 'identified'.format(subvol[0:stop])) brick_total = arequal[-1].splitlines()[-1].split(':')[-1] self.assertEqual( brick_total, mount_point_total, "Arequals for mountpoint and {} " "are not equal".format(subvol[0:stop]))
def test_heal_info_shouldnot_list_files_being_accessed(self): """ - bring brick 1 offline - create files and validate IO - get entries before accessing file - get first filename from active subvol without offline bricks - access and modify the file - while accessing - get entries - Compare entries before accessing and while accessing - validate IO """ # Bring 1-st brick offline brick_to_bring_offline = [self.bricks_list[0]] g.log.info('Bringing bricks %s offline...' % brick_to_bring_offline) ret = bring_bricks_offline(self.volname, brick_to_bring_offline) self.assertTrue(ret, 'Failed to bring bricks %s offline' % brick_to_bring_offline) ret = are_bricks_offline(self.mnode, self.volname, brick_to_bring_offline) self.assertTrue(ret, 'Bricks %s are not offline' % brick_to_bring_offline) g.log.info('Bringing bricks %s offline is successful' % brick_to_bring_offline) # Creating files on client side for mount_obj in self.mounts: g.log.info("Generating data for %s:%s" % (mount_obj.client_system, mount_obj.mountpoint)) # Creating files cmd = ("python %s create_files -f 100 %s" % (self.script_upload_path, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) self.all_mounts_procs.append(proc) # Validate IO g.log.info("Wait for IO to complete and validate IO ...") ret = validate_io_procs(self.all_mounts_procs, self.mounts) self.io_validation_complete = True self.assertTrue(ret, "IO failed on some of the clients") g.log.info("IO is successful on all mounts") # Get entries before accessing file g.log.info("Getting entries_before_accessing file...") entries_before_accessing = get_heal_info_summary( self.mnode, self.volname) self.assertNotEqual(entries_before_accessing, None, 'Can`t get heal info summary') g.log.info( "Getting entries_before_accessing file finished successfully") # Get filename to access from active subvol without offline bricks # Get last subvol subvols = get_subvols(self.mnode, self.volname) subvol_without_offline_brick = subvols['volume_subvols'][-1] # Get first brick server and brick path # and get first file from filelist subvol_mnode, mnode_brick = subvol_without_offline_brick[0].split(':') ret, file_list, err = g.run(subvol_mnode, 'ls %s' % mnode_brick) file_to_edit = file_list.splitlines()[0] # Access and modify the file g.log.info("Start modifying IO on all mounts...") self.all_mounts_procs = [] for mount_obj in self.mounts: g.log.info("Modifying IO on %s:%s", mount_obj.client_system, mount_obj.mountpoint) cmd = ("cd %s/ ; " "dd if=/dev/zero of=%s bs=1G count=1" % (mount_obj.mountpoint, file_to_edit)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) self.all_mounts_procs.append(proc) g.log.info("IO on %s:%s is modified successfully" % (mount_obj.client_system, mount_obj.mountpoint)) self.io_validation_complete = False # Get entries while accessing file g.log.info("Getting entries while accessing file...") entries_while_accessing = get_heal_info_summary( self.mnode, self.volname) self.assertNotEqual(entries_before_accessing, None, 'Can`t get heal info summary') g.log.info("Getting entries while accessing file " "finished successfully") # Compare dicts before accessing and while accessing g.log.info('Comparing entries before modifying and while modifying...') ret = cmp(entries_before_accessing, entries_while_accessing) self.assertEqual(ret, 0, 'Entries before modifying and while modifying' 'are not equal') g.log.info('Comparison entries before modifying and while modifying' 'finished successfully.') # Validate IO g.log.info("Wait for IO to complete and validate IO ...") ret = validate_io_procs(self.all_mounts_procs, self.mounts) self.assertTrue(ret, "IO failed on some of the clients") self.io_validation_complete = True g.log.info("IO is successful on all mounts")