def test_self_heal_daemon(self): """ Test Data-Self-Heal(heal command) Description: - Create directory test_hardlink_self_heal - Create directory test_data_self_heal - Creating files for hardlinks and data files - Get arequal before getting bricks offline - Select bricks to bring offline - Bring brick offline - Create hardlinks and append data to data files - Bring brick online - Wait for volume processes to be online - Verify volume's all process are online - Monitor heal completion - Check for split-brain - Get arequal after getting bricks online - Select bricks to bring offline - Bring brick offline - Truncate data to data files and verify hardlinks - Bring brick online - Wait for volume processes to be online - Verify volume's all process are online - Monitor heal completion - Check for split-brain - Get arequal again """ # pylint: disable=too-many-branches,too-many-statements,too-many-locals # Creating directory test_hardlink_self_heal ret = mkdir( self.mounts[0].client_system, "{}/test_hardlink_self_heal".format(self.mounts[0].mountpoint)) self.assertTrue(ret, "Failed to create directory") g.log.info( "Directory 'test_hardlink_self_heal' on %s created " "successfully", self.mounts[0]) # Creating directory test_data_self_heal ret = mkdir(self.mounts[0].client_system, "{}/test_data_self_heal".format(self.mounts[0].mountpoint)) self.assertTrue(ret, "Failed to create directory") g.log.info( "Directory test_hardlink_self_heal on %s created " "successfully", self.mounts[0]) # Creating files for hardlinks and data files cmd = ('cd %s/test_hardlink_self_heal;for i in `seq 1 5`;' 'do mkdir dir.$i ; for j in `seq 1 10` ; do dd if=' '/dev/urandom of=dir.$i/file.$j bs=1k count=$j;done; done;' 'cd ..' % self.mounts[0].mountpoint) ret, _, _ = g.run(self.mounts[0].client_system, cmd) self.assertEqual(ret, 0, "Failed to create file on mountpoint") g.log.info("Successfully created files on mountpoint") cmd = ('cd %s/test_data_self_heal;for i in `seq 1 100`;' 'do dd if=/dev/urandom of=file.$i bs=128K count=$i;done;' 'cd ..' % self.mounts[0].mountpoint) ret, _, _ = g.run(self.mounts[0].client_system, cmd) self.assertEqual(ret, 0, "Failed to create file on mountpoint") g.log.info("Successfully created files on mountpoint") # Get arequal before getting bricks offline ret, result_before_online = collect_mounts_arequal(self.mounts) self.assertTrue(ret, 'Failed to get arequal') g.log.info('Arequal before getting bricks online-%s', result_before_online) # Select bricks to bring offline bricks_to_bring_offline = select_volume_bricks_to_bring_offline( self.mnode, self.volname) self.assertIsNotNone(bricks_to_bring_offline, "List is empty") # Bring brick offline ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) self.assertTrue( ret, 'Failed to bring bricks {} offline'.format( bricks_to_bring_offline)) ret = are_bricks_offline(self.mnode, self.volname, bricks_to_bring_offline) self.assertTrue( ret, 'Bricks {} are not offline'.format(bricks_to_bring_offline)) g.log.info('Bringing bricks %s offline is successful', bricks_to_bring_offline) # Append data to data files and create hardlinks cmd = ('cd %s/test_data_self_heal;for i in `seq 1 100`;' 'do dd if=/dev/urandom of=file.$i bs=512K count=$i ; done ;' 'cd .. ' % self.mounts[0].mountpoint) ret, _, _ = g.run(self.mounts[0].client_system, cmd) self.assertEqual(ret, 0, "Failed to modify data files.") g.log.info("Successfully modified data files") cmd = ('cd %s/test_hardlink_self_heal;for i in `seq 1 5` ;do ' 'for j in `seq 1 10`;do ln dir.$i/file.$j dir.$i/link_file.$j;' 'done ; done ; cd .. ' % self.mounts[0].mountpoint) ret, _, _ = g.run(self.mounts[0].client_system, cmd) self.assertEqual(ret, 0, "Hardlinks creation failed") g.log.info("Successfully created hardlinks of files") # Bring bricks online ret = bring_bricks_online(self.mnode, self.volname, bricks_to_bring_offline) self.assertTrue( ret, 'Failed to bring bricks {} online'.format(bricks_to_bring_offline)) g.log.info('Bringing bricks %s online is successful', bricks_to_bring_offline) # Wait for volume processes to be online ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) self.assertTrue(ret, ("Failed to wait for volume {} processes to " "be online".format(self.volname))) g.log.info( "Successful in waiting for volume %s processes to be " "online", self.volname) # Verify volume's all process are online ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) self.assertTrue( ret, ("Volume {} : All process are not online".format(self.volname))) g.log.info("Volume %s : All process are online", self.volname) # Monitor heal completion ret = monitor_heal_completion(self.mnode, self.volname) self.assertTrue(ret, 'Heal has not yet completed') # Check for split-brain ret = is_volume_in_split_brain(self.mnode, self.volname) self.assertFalse(ret, 'Volume is in split-brain state') g.log.info('Volume is not in split-brain state') # Get arequal after getting bricks online ret, result_after_online = collect_mounts_arequal(self.mounts) self.assertTrue(ret, 'Failed to get arequal') g.log.info('Arequal after getting bricks online ' 'is %s', result_after_online) # Select bricks to bring offline bricks_to_bring_offline = select_volume_bricks_to_bring_offline( self.mnode, self.volname) self.assertIsNotNone(bricks_to_bring_offline, "List is empty") # Bring brick offline ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) self.assertTrue( ret, 'Failed to bring bricks {} offline'.format( bricks_to_bring_offline)) ret = are_bricks_offline(self.mnode, self.volname, bricks_to_bring_offline) self.assertTrue( ret, 'Bricks {} are not offline'.format(bricks_to_bring_offline)) g.log.info('Bringing bricks %s offline is successful', bricks_to_bring_offline) # Truncate data to data files and verify hardlinks cmd = ('cd %s/test_data_self_heal ; for i in `seq 1 100` ;' 'do truncate -s $(( $i * 128)) file.$i ; done ; cd ..' % self.mounts[0].mountpoint) ret, _, _ = g.run(self.mounts[0].client_system, cmd) self.assertEqual(ret, 0, "Failed to truncate files") g.log.info("Successfully truncated files on mountpoint") file_path = ('%s/test_hardlink_self_heal/dir{1..5}/file{1..10}' % (self.mounts[0].mountpoint)) link_path = ('%s/test_hardlink_self_heal/dir{1..5}/link_file{1..10}' % (self.mounts[0].mountpoint)) file_stat = get_file_stat(self.mounts[0], file_path) link_stat = get_file_stat(self.mounts[0], link_path) self.assertEqual(file_stat, link_stat, "Verification of hardlinks " "failed") g.log.info("Successfully verified hardlinks") # Bring brick online ret = bring_bricks_online(self.mnode, self.volname, bricks_to_bring_offline) self.assertTrue( ret, 'Failed to bring bricks {} online'.format(bricks_to_bring_offline)) g.log.info('Bringing bricks %s online is successful', bricks_to_bring_offline) # Wait for volume processes to be online ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) self.assertTrue(ret, ("Failed to wait for volume {} processes to " "be online".format(self.volname))) g.log.info( "Successful in waiting for volume %s processes to be " "online", self.volname) # Verify volume's all process are online ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) self.assertTrue( ret, ("Volume {} : All process are not online".format(self.volname))) g.log.info("Volume %s : All process are online", self.volname) # Monitor heal completion ret = monitor_heal_completion(self.mnode, self.volname) self.assertTrue(ret, 'Heal has not yet completed')
def test_server_side_healing_happens_only_when_glustershd_running(self): """ Test Script which verifies that the server side healing must happen only if the heal daemon is running on the node where source brick resides. * Create and start the Replicate volume * Check the glustershd processes - Only 1 glustershd should be listed * Bring down the bricks without affecting the cluster * Create files on volume * kill the glustershd on node where bricks is running * bring the bricks up which was killed in previous steps * check the heal info - heal info must show pending heal info, heal shouldn't happen since glustershd is down on source node * issue heal * trigger client side heal * heal should complete successfully """ # pylint: disable=too-many-locals,too-many-statements,too-many-lines # Setting Volume options options = { "metadata-self-heal": "on", "entry-self-heal": "on", "data-self-heal": "on" } ret = set_volume_options(self.mnode, self.volname, options) self.assertTrue(ret, 'Failed to set options %s' % options) g.log.info("Successfully set %s for volume %s", options, self.volname) # Check the self-heal daemon process ret, pids = get_self_heal_daemon_pid(self.servers) self.assertTrue(ret, ("Either No self heal daemon process found or " "more than One self heal daemon process " "found : %s" % pids)) g.log.info( "Successful in verifying self heal daemon process" " on all nodes %s", self.servers) # Select the bricks to bring offline bricks_to_bring_offline = (select_volume_bricks_to_bring_offline( self.mnode, self.volname)) g.log.info("Brick List to bring offline : %s", bricks_to_bring_offline) # Bring down the selected bricks ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) self.assertTrue(ret, "Failed to bring down the bricks") g.log.info("Brought down the brick process " "for %s", bricks_to_bring_offline) # Write files on all mounts all_mounts_procs, num_files_to_write = [], 100 for mount_obj in self.mounts: cmd = ("/usr/bin/env python %s create_files " "-f %s --base-file-name file %s" % (self.script_upload_path, num_files_to_write, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_mounts_procs.append(proc) # Validate IO ret = validate_io_procs(all_mounts_procs, self.mounts) self.assertTrue(ret, "IO failed on some of the clients") g.log.info("IO is successful on all mounts") # Get online bricks list online_bricks = get_online_bricks_list(self.mnode, self.volname) g.log.info("Online Bricks for volume %s : %s", self.volname, online_bricks) # Get the nodes where bricks are running bring_offline_glustershd_nodes = [] for brick in online_bricks: bring_offline_glustershd_nodes.append(brick.split(":")[0]) g.log.info("self heal deamon on nodes %s to be killed", bring_offline_glustershd_nodes) # Kill the self heal daemon process on nodes ret = bring_self_heal_daemon_process_offline( bring_offline_glustershd_nodes) self.assertTrue( ret, ("Unable to bring self heal daemon process" " offline for nodes %s" % bring_offline_glustershd_nodes)) g.log.info( "Sucessfully brought down self heal process for " "nodes %s", bring_offline_glustershd_nodes) # Check the heal info heal_info = get_heal_info_summary(self.mnode, self.volname) g.log.info("Successfully got heal info %s for the volume %s", heal_info, self.volname) # Bring bricks online ret = bring_bricks_online(self.mnode, self.volname, bricks_to_bring_offline, 'glusterd_restart') self.assertTrue( ret, ("Failed to bring bricks: %s online" % bricks_to_bring_offline)) # Issue heal ret = trigger_heal_full(self.mnode, self.volname) self.assertFalse(ret, ("Able to trigger heal on volume %s where " "self heal daemon is not running" % self.volname)) g.log.info( "Expected : Unable to trigger heal on volume %s where " "self heal daemon is not running", self.volname) # Wait for 130 sec to heal ret = monitor_heal_completion(self.mnode, self.volname, 130) self.assertFalse(ret, ("Heal Completed on volume %s" % self.volname)) g.log.info("Expected : Heal pending on volume %s", self.volname) # Check the heal info heal_info_after_triggering_heal = get_heal_info_summary( self.mnode, self.volname) g.log.info("Successfully got heal info for the volume %s", self.volname) # Compare with heal pending with the files wrote for node in online_bricks: self.assertGreaterEqual( int(heal_info_after_triggering_heal[node]['numberOfEntries']), num_files_to_write, ("Some of the files are healed from source bricks %s where " "self heal daemon is not running" % node)) g.log.info("EXPECTED: No files are healed from source bricks where " "self heal daemon is not running") # Unmount and Mount volume again as volume options were set # after mounting the volume for mount_obj in self.mounts: ret, _, _ = umount_volume(mount_obj.client_system, mount_obj.mountpoint) self.assertEqual(ret, 0, "Failed to unmount %s" % mount_obj.client_system) ret, _, _ = mount_volume(self.volname, mtype='glusterfs', mpoint=mount_obj.mountpoint, mserver=self.mnode, mclient=mount_obj.client_system) self.assertEqual(ret, 0, "Failed to mount %s" % mount_obj.client_system) all_mounts_procs = [] for mount_obj in self.mounts: cmd = ("/usr/bin/env python %s read %s" % (self.script_upload_path, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_mounts_procs.append(proc) # Validate IO ret = validate_io_procs(all_mounts_procs, self.mounts) self.assertTrue(ret, "Reads failed on some of the clients") g.log.info("Reads successful on all mounts") # Wait for heal to complete ret = monitor_heal_completion(self.mnode, self.volname) self.assertTrue(ret, "Unable to heal the pending entries") g.log.info("Successfully healed the pending entries for volume %s", self.volname)
def test_gluster_clone_heal(self): """ Test gluster compilation on mount point(Heal command) - Creating directory test_compilation - Compile gluster on mountpoint - Select bricks to bring offline - Bring brick offline - Validate IO - Bring bricks online - Wait for volume processes to be online - Verify volume's all process are online - Monitor heal completion - Check for split-brain - Get arequal after getting bricks online - Compile gluster on mountpoint again - Select bricks to bring offline - Bring brick offline - Validate IO - Bring bricks online - Wait for volume processes to be online - Verify volume's all process are online - Monitor heal completion - Check for split-brain - Get arequal after getting bricks online """ # pylint: disable=too-many-branches,too-many-statements,too-many-locals # Creating directory test_compilation ret = mkdir(self.mounts[0].client_system, "{}/test_compilation".format(self.mounts[0].mountpoint)) self.assertTrue(ret, "Failed to create directory") g.log.info( "Directory 'test_compilation' on %s created " "successfully", self.mounts[0]) # Compile gluster on mountpoint cmd = ("cd %s/test_compilation ; rm -rf glusterfs; git clone" " git://github.com/gluster/glusterfs.git ; cd glusterfs ;" " ./autogen.sh ;./configure CFLAGS='-g3 -O0 -DDEBUG'; make ;" " cd ../..;" % self.mounts[0].mountpoint) proc = g.run_async(self.mounts[0].client_system, cmd) # Select bricks to bring offline bricks_to_bring_offline = select_volume_bricks_to_bring_offline( self.mnode, self.volname) self.assertIsNotNone(bricks_to_bring_offline, "List is empty") # Bring brick offline ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) self.assertTrue( ret, 'Failed to bring bricks {} offline'.format( bricks_to_bring_offline)) ret = are_bricks_offline(self.mnode, self.volname, bricks_to_bring_offline) self.assertTrue( ret, 'Bricks {} are not offline'.format(bricks_to_bring_offline)) g.log.info('Bringing bricks %s offline is successful', bricks_to_bring_offline) # Validate IO self.assertTrue(validate_io_procs([proc], self.mounts[0]), "IO failed on some of the clients") # Bring bricks online ret = bring_bricks_online(self.mnode, self.volname, bricks_to_bring_offline) self.assertTrue( ret, 'Failed to bring bricks {} online'.format(bricks_to_bring_offline)) # Wait for volume processes to be online ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) self.assertTrue(ret, ("Failed to wait for volume {} processes to " "be online".format(self.volname))) # Verify volume's all process are online ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) self.assertTrue( ret, ("Volume {} : All process are not online".format(self.volname))) # Monitor heal completion ret = monitor_heal_completion(self.mnode, self.volname) self.assertTrue(ret, 'Heal has not yet completed') # Check for split-brain ret = is_volume_in_split_brain(self.mnode, self.volname) self.assertFalse(ret, 'Volume is in split-brain state') g.log.info('Volume is not in split-brain state') # Get arequal after getting bricks online ret, result_after_online = collect_mounts_arequal(self.mounts) self.assertTrue(ret, 'Failed to get arequal') g.log.info("Arequal of mountpoint %s", result_after_online) # Compile gluster on mountpoint again proc1 = g.run_async(self.mounts[0].client_system, cmd) # Select bricks to bring offline bricks_to_bring_offline = select_volume_bricks_to_bring_offline( self.mnode, self.volname) self.assertIsNotNone(bricks_to_bring_offline, "List is empty") # Bring brick offline ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) self.assertTrue( ret, 'Failed to bring bricks {} offline'.format( bricks_to_bring_offline)) ret = are_bricks_offline(self.mnode, self.volname, bricks_to_bring_offline) self.assertTrue( ret, 'Bricks {} are not offline'.format(bricks_to_bring_offline)) # Validate IO self.assertTrue(validate_io_procs([proc1], self.mounts[0]), "IO failed on some of the clients") # Bring bricks online ret = bring_bricks_online(self.mnode, self.volname, bricks_to_bring_offline) self.assertTrue( ret, 'Failed to bring bricks {} online'.format(bricks_to_bring_offline)) # Wait for volume processes to be online ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) self.assertTrue(ret, ("Failed to wait for volume {} processes to " "be online".format(self.volname))) # Verify volume's all process are online ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) self.assertTrue( ret, ("Volume {} : All process are not online".format(self.volname))) # Monitor heal completion ret = monitor_heal_completion(self.mnode, self.volname) self.assertTrue(ret, 'Heal has not yet completed') # Check for split-brain ret = is_volume_in_split_brain(self.mnode, self.volname) self.assertFalse(ret, 'Volume is in split-brain state') # Get arequal after getting bricks online ret, result_after_online = collect_mounts_arequal(self.mounts) self.assertTrue(ret, 'Failed to get arequal') g.log.info("Arequal of mountpoint %s", result_after_online)
def test_existing_glustershd_should_take_care_of_self_healing(self): """ Test Script which verifies that the existing glustershd should take care of self healing * Create and start the Replicate volume * Check the glustershd processes - Note the pids * Bring down the One brick ( lets say brick1) without affecting the cluster * Create 1000 files on volume * bring the brick1 up which was killed in previous steps * check the heal info - proactive self healing should start * Bring down brick1 again * wait for 60 sec and brought up the brick1 * Check the glustershd processes - pids should be different * Monitor the heal till its complete """ # pylint: disable=too-many-locals,too-many-lines,too-many-statements nodes = self.servers # check the self-heal daemon process g.log.info("Starting to get self-heal daemon process on " "nodes %s", nodes) ret, pids = get_self_heal_daemon_pid(nodes) self.assertTrue(ret, ("Either No self heal daemon process found or " "more than One self heal daemon process " "found : %s" % pids)) g.log.info( "Successful in getting Single self heal daemon process" " on all nodes %s", nodes) glustershd_pids = pids # select the bricks to bring offline g.log.info("Selecting bricks to brought offline for volume %s", self.volname) bricks_to_bring_offline = \ select_volume_bricks_to_bring_offline(self.mnode, self.volname) g.log.info("Brick List to bring offline : %s", bricks_to_bring_offline) # Bring down the selected bricks g.log.info("Going to bring down the brick process " "for %s", bricks_to_bring_offline) ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) self.assertTrue(ret, ("Failed to bring down the bricks. Please " "check the log file for more details.")) g.log.info("Brought down the brick process " "for %s successfully", bricks_to_bring_offline) # get the bricks which are running g.log.info("getting the brick list which are online") online_bricks = get_online_bricks_list(self.mnode, self.volname) g.log.info("Online Bricks for volume %s : %s", self.volname, online_bricks) # write 1MB files to the mounts g.log.info("Starting IO on all mounts...") g.log.info("mounts: %s", self.mounts) all_mounts_procs = [] cmd = ("for i in `seq 1 1000`; " "do dd if=/dev/urandom of=%s/file_$i " "bs=1M count=1; " "done" % self.mounts[0].mountpoint) g.log.info(cmd) proc = g.run_async(self.mounts[0].client_system, cmd, user=self.mounts[0].user) all_mounts_procs.append(proc) # Validate IO self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts), "IO failed on some of the clients") # check the heal info g.log.info("Get the pending heal info for the volume %s", self.volname) heal_info = get_heal_info_summary(self.mnode, self.volname) g.log.info("Successfully got heal info for the volume %s", self.volname) g.log.info("Heal Info for volume %s : %s", self.volname, heal_info) # Bring bricks online g.log.info("Bring bricks: %s online", bricks_to_bring_offline) ret = bring_bricks_online(self.mnode, self.volname, bricks_to_bring_offline, 'glusterd_restart') self.assertTrue( ret, ("Failed to bring bricks: %s online" % bricks_to_bring_offline)) g.log.info("Successfully brought all bricks: %s online", bricks_to_bring_offline) # Wait for 90 sec to start self healing g.log.info('Waiting for 90 sec to start self healing') time.sleep(90) # check the heal info g.log.info("Get the pending heal info for the volume %s", self.volname) heal_info_after_brick_online = get_heal_info_summary( self.mnode, self.volname) g.log.info("Successfully got heal info for the volume %s", self.volname) g.log.info("Heal Info for volume %s : %s", self.volname, heal_info_after_brick_online) # check heal pending is decreased flag = False for brick in online_bricks: if int(heal_info_after_brick_online[brick]['numberOfEntries'])\ < int(heal_info[brick]['numberOfEntries']): flag = True break self.assertTrue(flag, "Pro-active self heal is not started") g.log.info("Pro-active self heal is started") # bring down bricks again g.log.info("Going to bring down the brick process " "for %s", bricks_to_bring_offline) ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) self.assertTrue(ret, ("Failed to bring down the bricks. Please " "check the log file for more details.")) g.log.info("Brought down the brick process " "for %s successfully", bricks_to_bring_offline) # wait for 60 sec and brought up the brick again g.log.info('waiting for 60 sec and brought up the brick again') time.sleep(60) g.log.info("Bring bricks: %s online", bricks_to_bring_offline) ret = bring_bricks_online(self.mnode, self.volname, bricks_to_bring_offline, 'glusterd_restart') self.assertTrue( ret, ("Failed to bring bricks: %s online" % bricks_to_bring_offline)) g.log.info("Successfully brought all bricks: %s online", bricks_to_bring_offline) # Verfiy glustershd process releases its parent process ret = is_shd_daemonized(nodes) self.assertTrue(ret, ("Either No self heal daemon process found or " "more than One self heal daemon process found")) # check the self-heal daemon process g.log.info("Starting to get self-heal daemon process on " "nodes %s", nodes) ret, pids = get_self_heal_daemon_pid(nodes) self.assertTrue(ret, ("Either No self heal daemon process found or " "more than One self heal daemon process " "found : %s" % pids)) g.log.info( "Successful in getting Single self heal daemon process" " on all nodes %s", nodes) shd_pids_after_bricks_online = pids # compare the glustershd pids self.assertNotEqual(glustershd_pids, shd_pids_after_bricks_online, ("self heal daemon process are same before and " "after bringing up bricks online")) g.log.info("EXPECTED : self heal daemon process are different before " "and after bringing up bricks online") # wait for heal to complete g.log.info("Monitoring the heal.....") ret = monitor_heal_completion(self.mnode, self.volname) self.assertTrue(ret, ("Heal is not completed on volume %s" % self.volname)) g.log.info("Heal Completed on volume %s", self.volname) # Check if heal is completed ret = is_heal_complete(self.mnode, self.volname) self.assertTrue(ret, 'Heal is not complete') g.log.info('Heal is completed successfully')
def perform_test(self, ctime): """ Testcase steps: 1. Enable/disable features,ctime based on function argument. 2. Create a directory on the mount point. 3. Kill a brick and create a file inside the directory. 4. Bring the brick online. 5. Trigger heal and wait for its completion. 6. Verify that the atime, mtime and ctime of the directory are same on all bricks of the replica. """ if ctime: option = {'features.ctime': 'on'} else: option = {'features.ctime': 'off'} ret = set_volume_options(self.mnode, self.volname, option) self.assertTrue( ret, 'failed to set option %s on %s' % (option, self.volume)) client, m_point = (self.mounts[0].client_system, self.mounts[0].mountpoint) dirpath = '{}/dir1'.format(m_point) ret = mkdir(client, dirpath) self.assertTrue(ret, 'Unable to create a directory from mount point') bricks_to_bring_offline = select_volume_bricks_to_bring_offline( self.mnode, self.volname) self.assertIsNotNone(bricks_to_bring_offline, "List is empty") ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) self.assertTrue( ret, 'Failed to bring bricks {} offline'.format( bricks_to_bring_offline)) ret = are_bricks_offline(self.mnode, self.volname, bricks_to_bring_offline) self.assertTrue( ret, 'Bricks {} are not offline'.format(bricks_to_bring_offline)) cmd = 'touch {}/file1'.format(dirpath) ret, _, _ = g.run(client, cmd) self.assertEqual(ret, 0, 'Unable to create file from mount point') ret = bring_bricks_online( self.mnode, self.volname, bricks_to_bring_offline, bring_bricks_online_methods=['volume_start_force']) self.assertTrue( ret, 'Failed to bring bricks {} online'.format(bricks_to_bring_offline)) ret = trigger_heal(self.mnode, self.volname) self.assertTrue(ret, 'Starting heal failed') ret = monitor_heal_completion(self.mnode, self.volname) self.assertTrue(ret, 'Heal has not yet completed') if ctime: ret = self.are_mdata_xattrs_equal() self.assertTrue(ret, "glusterfs.mdata mismatch for {}".format(dirpath)) else: ret = self.are_stat_timestamps_equal() self.assertTrue(ret, "stat mismatch for {}".format(dirpath)) ret = rmdir(client, dirpath, force=True) self.assertTrue(ret, 'Unable to delete directory from mount point')
def test_gfid_self_heal(self): """ Test GFID self heal Description: - Creating directory test_compilation - Write Deep directories and files - Get arequal before getting bricks offline - Select bricks to bring offline - Bring brick offline - Delete directory on mountpoint where data is writte - Create the same directory and write same data - Bring bricks online - Wait for volume processes to be online - Verify volume's all process are online - Monitor heal completion - Check for split-brain - Get arequal after getting bricks online """ # pylint: disable=too-many-branches,too-many-statements,too-many-locals # Creating directory test_compilation ret = mkdir(self.mounts[0].client_system, "{}/test_gfid_self_heal".format(self.mounts[0].mountpoint)) self.assertTrue(ret, "Failed to create directory") g.log.info( "Directory 'test_gfid_self_heal' on %s created " "successfully", self.mounts[0]) # Write Deep directories and files count = 1 for mount_obj in self.mounts: cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " "--dirname-start-num %d --dir-depth 2 " "--dir-length 10 --max-num-of-dirs 5 " "--num-of-files 5 %s/dir1" % (self.script_upload_path, count, mount_obj.mountpoint)) ret, _, _ = g.run(self.mounts[0].client_system, cmd) self.assertEqual(ret, 0, "Failed to create files on mountpoint") g.log.info("Successfully created files on mountpoint") count += 10 # Get arequal before getting bricks offline ret, result_before_offline = collect_mounts_arequal(self.mounts) self.assertTrue(ret, 'Failed to get arequal') g.log.info('Arequal after getting bricks offline ' 'is %s', result_before_offline) # Select bricks to bring offline bricks_to_bring_offline = select_volume_bricks_to_bring_offline( self.mnode, self.volname) self.assertIsNotNone(bricks_to_bring_offline, "List is empty") # Bring brick offline ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) self.assertTrue( ret, 'Failed to bring bricks {} offline'.format( bricks_to_bring_offline)) ret = are_bricks_offline(self.mnode, self.volname, bricks_to_bring_offline) self.assertTrue( ret, 'Bricks {} are not offline'.format(bricks_to_bring_offline)) g.log.info('Bringing bricks %s offline is successful', bricks_to_bring_offline) # Delete directory on mountpoint where data is written cmd = ('rm -rf -v %s/test_gfid_self_heal' % self.mounts[0].mountpoint) ret, _, _ = g.run(self.mounts[0].client_system, cmd) self.assertEqual(ret, 0, "Failed to delete directory") g.log.info("Directory deleted successfully for %s", self.mounts[0]) # Create the same directory and write same data ret = mkdir(self.mounts[0].client_system, "{}/test_gfid_self_heal".format(self.mounts[0].mountpoint)) self.assertTrue(ret, "Failed to create directory") g.log.info( "Directory 'test_gfid_self_heal' on %s created " "successfully", self.mounts[0]) # Write the same files again count = 1 for mount_obj in self.mounts: cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " "--dirname-start-num %d --dir-depth 2 " "--dir-length 10 --max-num-of-dirs 5 " "--num-of-files 5 %s/dir1" % (self.script_upload_path, count, mount_obj.mountpoint)) ret, _, _ = g.run(self.mounts[0].client_system, cmd) self.assertEqual(ret, 0, "Failed to create files on mountpoint") g.log.info("Successfully created files on mountpoint") count += 10 # Bring bricks online ret = bring_bricks_online( self.mnode, self.volname, bricks_to_bring_offline, bring_bricks_online_methods=['volume_start_force']) self.assertTrue( ret, 'Failed to bring bricks {} online'.format(bricks_to_bring_offline)) g.log.info('Bringing bricks %s online is successful', bricks_to_bring_offline) # Wait for volume processes to be online ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) self.assertTrue(ret, ("Failed to wait for volume {} processes to " "be online".format(self.volname))) g.log.info( "Successful in waiting for volume %s processes to be " "online", self.volname) # Verify volume's all process are online ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) self.assertTrue( ret, ("Volume {} : All process are not online".format(self.volname))) g.log.info("Volume %s : All process are online", self.volname) # Monitor heal completion ret = monitor_heal_completion(self.mnode, self.volname) self.assertTrue(ret, 'Heal has not yet completed') # Check for split-brain ret = is_volume_in_split_brain(self.mnode, self.volname) self.assertFalse(ret, 'Volume is in split-brain state') g.log.info('Volume is not in split-brain state') # Get arequal after getting bricks online ret, result_after_online = collect_mounts_arequal(self.mounts) self.assertTrue(ret, 'Failed to get arequal') g.log.info('Arequal after getting bricks online ' 'is %s', result_after_online)