示例#1
0
    def _add_brick_rebalance(self):
        """Create files,Perform Add brick and wait for rebalance to complete"""
        # Create files on mount point using dd command
        cmd = ('cd %s;for i in {1..100000};'
               'do dd if=/dev/urandom bs=1024 count=1 of=file$i;done;'
               % (self.mounts[0].mountpoint))
        ret, _, _ = g.run(self.mounts[0].client_system, cmd)
        self.assertEqual(ret, 0, "Failed to createfiles on mountpoint")
        g.log.info("Successfully created files on mountpoint")

        # Add brick to volume
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, "Failed to add brick on volume %s"
                        % self.volname)

        # Trigger rebalance and wait for it to complete
        ret, _, _ = rebalance_start(self.mnode, self.volname,
                                    force=True)
        self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s"
                         % self.volname)

        # Wait for rebalance to complete
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname,
                                             timeout=1200)
        self.assertTrue(ret, "Rebalance is not yet complete on the volume "
                             "%s" % self.volname)
        g.log.info("Rebalance successfully completed")
示例#2
0
    def test_add_bricks_stopped_volume(self):
        """Add bricks to stopped volume
        """
        ret, _, _ = volume_stop(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Can't stop volume")

        g.log.info('Volume %s stopped successfully', self.volname)

        vol_bricks = {
            'before': get_all_bricks(self.mnode, self.volname),
            'after': []
        }

        g.log.debug("Adding bricks to volume %s", self.volname)
        ret = expand_volume(mnode=self.mnode,
                            volname=self.volname,
                            servers=self.servers,
                            all_servers_info=self.all_servers_info)
        self.assertTrue(ret, 'Unable to expand volume %s' % self.volname)

        g.log.info('Added bricks to stopped volume %s', self.volname)

        vol_bricks['after'] = get_all_bricks(self.mnode, self.volname)
        self.assertGreater(len(vol_bricks['after']), len(vol_bricks['before']),
                           "Expected new volume size to be greater than old")
        g.log.info('Success in add bricks to stopped volume')
示例#3
0
    def test_rebalance_start_when_glusterd_down(self):

        # Expanding volume by adding bricks to the volume
        g.log.info("Start adding bricks to volume")
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Volume %s: Expand failed", self.volname))
        g.log.info("Volume %s: Expand success", self.volname)

        # Form a new list of servers without mnode in it to prevent mnode
        # from glusterd failure
        nodes = self.servers[:]
        nodes.remove(self.mnode)

        # Stop glusterd on a server
        self.random_server = random.choice(nodes)
        g.log.info("Stop glusterd on server %s", self.random_server)
        ret = stop_glusterd(self.random_server)
        self.assertTrue(ret, ("Server %s: Failed to stop glusterd",
                              self.random_server))
        g.log.info("Server %s: Stopped glusterd", self.random_server)

        # Start Rebalance
        g.log.info("Starting rebalance on the volume")
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Volume %s: Failed to start rebalance",
                                  self.volname))
        g.log.info("Volume %s: Rebalance start success", self.volname)
        # Wait for rebalance to complete
        g.log.info("Waiting for rebalance to complete")
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertFalse(ret, ("Volume %s: Rebalance is completed",
                               self.volname))
        g.log.info("Rebalance failed on one or more nodes. Check rebalance "
                   "status for more details")
    def test_add_brick_rebalance_files_with_holes(self):
        """
        Test case:
        1. Create a volume, start it and mount it using fuse.
        2. On the volume root, create files with holes.
        3. After the file creation is complete, add bricks to the volume.
        4. Trigger rebalance on the volume.
        5. Wait for rebalance to complete.
        """
        # On the volume root, create files with holes
        cmd = ("cd %s;for i in {1..5000}; do dd if=/dev/urandom"
               " of=file_with_holes$i bs=1M count=1 seek=100M; done" %
               self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.first_client, cmd)
        self.assertFalse(ret, "Failed to create files with holes")

        # After the file creation is complete, add bricks to the volume
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, "Failed to add brick on volume %s" % self.volname)

        # Trigger rebalance on the volume
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(
            ret, 0,
            "Failed to start rebalance on the volume %s" % self.volname)

        # Wait for rebalance to complete
        ret = wait_for_rebalance_to_complete(self.mnode,
                                             self.volname,
                                             timeout=9000)
        self.assertTrue(
            ret, "Rebalance is not yet complete on the volume "
            "%s" % self.volname)
        g.log.info("Rebalance successfully completed")
    def test_replicated_to_arbiter_volume(self):
        """
        Description:-
        Reduce the replica count from replica 3 to arbiter
        """
        # pylint: disable=too-many-statements
        # Remove brick to reduce the replica count from replica 3
        g.log.info("Removing bricks to form replica 2 volume")
        ret = shrink_volume(self.mnode, self.volname, replica_num=0)
        self.assertTrue(ret,
                        "Failed to remove brick on volume %s" % self.volname)
        g.log.info("Successfully removed brick on volume %s", self.volname)

        # Wait for volume processes to be online
        g.log.info("Wait for volume processes to be online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(
            ret, "Volume %s process not online despite waiting "
            "for 300 seconds" % self.volname)
        g.log.info(
            "Successful in waiting for volume %s processes to be "
            "online", self.volname)

        # Verifying all bricks online
        g.log.info("Verifying volume's all process are online")
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(
            ret, "Volume %s : All process are not online" % self.volname)
        g.log.info("Volume %s : All process are online", self.volname)

        # Adding the bricks to make arbiter brick
        g.log.info("Adding bricks to convert to Arbiter Volume")
        replica_arbiter = {'replica_count': 1, 'arbiter_count': 1}
        ret = expand_volume(self.mnode,
                            self.volname,
                            self.servers,
                            self.all_servers_info,
                            add_to_hot_tier=False,
                            **replica_arbiter)
        self.assertTrue(ret, "Failed to expand the volume  %s" % self.volname)
        g.log.info("Changing volume to arbiter volume is successful %s",
                   self.volname)

        # Wait for volume processes to be online
        g.log.info("Wait for volume processes to be online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(
            ret, "Failed to wait for volume %s processes "
            "to be online" % self.volname)
        g.log.info(
            "Successful in waiting for volume %s processes to be "
            "online", self.volname)

        # Verify volume's all process are online
        g.log.info("Verifying volume's all process are online")
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(
            ret, "Volume %s : All process are not online" % self.volname)
        g.log.info("Volume %s : All process are online", self.volname)
    def test_add_bricks_io_mount_point(self):
        # Mount volume
        ret = self.mount_volume(self.mounts)
        self.assertTrue(ret, 'Mount volume: FAIL')
        g.log.info('Mounted Volume %s: Success', self.volname)

        # Upload io scripts for running IO on mounts
        g.log.info(
            "Upload io scripts to clients %s for running IO on "
            "mounts", self.clients)
        script_location = "/usr/share/glustolibs/io/scripts/file_dir_ops.py"
        ret = upload_scripts(self.clients, script_location)
        if not ret:
            clients = ", ".join(self.clients)
            g.log.error("Failed to upload IO scripts to clients %s", clients)
            raise ExecutionError("Failed to upload IO scripts to clients %s" %
                                 clients)
        g.log.info("Successfully uploaded IO scripts to clients %s",
                   self.clients)
        # Start IO on mounts
        g.log.info("Starting IO on all mounts...")
        for index, mount_obj in enumerate(self.mounts, start=1):
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 2 "
                   "--dir-length 2 "
                   "--max-num-of-dirs 2 "
                   "--num-of-files 10 %s" %
                   (script_location, index + 10, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            # Expand volume
            g.log.debug("Expanding volume %s", self.volname)
            ret = expand_volume(mnode=self.mnode,
                                volname=self.volname,
                                servers=self.servers,
                                all_servers_info=self.all_servers_info)
            self.assertTrue(ret, "Expand volume %s: Fail" % self.volname)
            g.log.info("Volume %s expanded: Success", self.volname)

            # Validate IO on current mount point
            g.log.debug('Validating IO on mount point %s:%s',
                        mount_obj.client_system, mount_obj.mountpoint)
            self.assertTrue(
                validate_io_procs([proc],
                                  [mount_obj]), 'IO Failed on client %s:%s' %
                (mount_obj.client_system, mount_obj.mountpoint))

        g.log.debug("Unmounting mount points")
        self.assertTrue(self.unmount_volume(self.mounts),
                        'Unmount end points: Fail')
        g.log.info("Unmount mount points: Success")

        g.log.info('Add brick during IO operations successfully')
    def test_verify_df_output_when_brick_replaced(self):
        """
        - Take the output of df -h.
        - Replace any one brick for the volumes.
        - Wait till the heal is completed
        - Repeat steps 1, 2 and 3 for all bricks for all volumes.
        - Check if there are any inconsistencies in the output of df -h
        - Remove bricks from volume and check output of df -h
        - Add bricks to volume and check output of df -h
        """

        # Perform some IO on the mount point
        self._perform_io_and_validate()

        # Get the mount size from df -h output
        initial_mount_size = self._get_mount_size_from_df_h_output()

        # Replace all the bricks and wait till the heal completes
        self._replace_bricks_and_wait_for_heal_completion()

        # Get df -h output after brick replace
        mount_size_after_replace = self._get_mount_size_from_df_h_output()

        # Verify the mount point size remains the same after brick replace
        self.assertEqual(
            initial_mount_size, mount_size_after_replace,
            "The mount sizes before and after replace bricks "
            "are not same")

        # Add bricks
        ret = expand_volume(self.mnode,
                            self.volname,
                            self.servers,
                            self.all_servers_info,
                            force=True)
        self.assertTrue(ret, "Failed to add-brick to volume")

        # Get df -h output after volume expand
        mount_size_after_expand = self._get_mount_size_from_df_h_output()

        # Verify df -h output returns greater value
        self.assertGreater(mount_size_after_expand, initial_mount_size,
                           "The mount size has not increased after expanding")

        # Remove bricks
        ret = shrink_volume(self.mnode, self.volname, force=True)
        self.assertTrue(ret, ("Remove brick operation failed on "
                              "%s", self.volname))
        g.log.info("Remove brick operation is successful on "
                   "volume %s", self.volname)

        # Get df -h output after volume shrink
        mount_size_after_shrink = self._get_mount_size_from_df_h_output()

        # Verify the df -h output returns smaller value
        self.assertGreater(mount_size_after_expand, mount_size_after_shrink,
                           "The mount size has not reduced after shrinking")
    def test_rebalance_start_when_glusterd_down(self):

        # Expanding volume by adding bricks to the volume
        g.log.info("Start adding bricks to volume")
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Volume %s: Expand failed", self.volname))
        g.log.info("Volume %s: Expand success", self.volname)

        # Get all servers IP addresses which are part of volume
        ret = get_all_bricks(self.mnode, self.volname)
        list_of_servers_used = []
        for brick in ret:
            list_of_servers_used.append(brick.split(":")[0])
        self.assertTrue(ret, ("Failed to get server IP list for volume %s",
                              self.volname))
        g.log.info("Succesfully got server IP list for volume %s",
                   self.volname)

        # Form a new list of servers without mnode in it to prevent mnode
        # from glusterd failure
        for element in list_of_servers_used:
            if element == self.mnode:
                list_of_servers_used.remove(element)

        # Stop glusterd on a server
        self.random_server = choice(list_of_servers_used)
        g.log.info("Stop glusterd on server %s", self.random_server)
        ret = stop_glusterd(self.random_server)
        self.assertTrue(ret, ("Server %s: Failed to stop glusterd",
                              self.random_server))
        g.log.info("Server %s: Stopped glusterd", self.random_server)

        # Start Rebalance
        g.log.info("Starting rebalance on the volume")
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Volume %s: Failed to start rebalance",
                                  self.volname))
        g.log.info("Volume %s: Rebalance start success", self.volname)
        # Wait for rebalance to complete
        g.log.info("Waiting for rebalance to complete")
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertFalse(ret, ("Volume %s: Rebalance is completed",
                               self.volname))
        g.log.info("Expected: Rebalance failed on one or more nodes."
                   " Check rebalance status for more details")
        error_msg1 = "\"fix layout on / failed\""
        error_msg2 = "\"Transport endpoint is not connected\""
        ret, _, _ = g.run(self.mnode, "grep -w %s /var/log/glusterfs/"
                          "%s-rebalance.log| grep -w %s"
                          % (error_msg1, self.volname, error_msg2))
        self.assertEqual(ret, 0, ("Unexpected : Rebalance failed on volume %s"
                                  "not because of glusterd down on a node",
                                  self.volname))
        g.log.info("\n\nRebalance failed on volume %s due to glusterd down on"
                   "one of the nodes\n\n", self.volname)
示例#9
0
    def test_rebalance_with_add_brick_and_lookup(self):
        """
        Rebalance with add brick and then lookup on mount
        - Create a Distributed-Replicated volume.
        - Create deep dirs(200) and 100 files on the deepest directory.
        - Expand volume.
        - Initiate rebalance
        - Once rebalance is completed, do a lookup on mount and time it.
        """
        # Create Deep dirs.
        cmd = (
            "cd %s/; for i in {1..200};do mkdir dir${i}; cd dir${i};"
            " if [ ${i} -eq 100 ]; then for j in {1..100}; do touch file${j};"
            " done; fi; done;" % (self.mounts[0].mountpoint))
        ret, _, _ = g.run(self.clients[0], cmd)
        self.assertEqual(ret, 0, "Failed to create the deep dirs and files")
        g.log.info("Deep dirs and files created.")

        # Expand the volume.
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Failed to expand the volume %s", self.volname))
        g.log.info("Expanding volume is successful on "
                   "volume %s", self.volname)

        # Start Rebalance.
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to start rebalance on the volume "
                                  "%s", self.volname))
        g.log.info("Successfully started rebalance on the volume %s",
                   self.volname)

        # Wait for rebalance to complete
        ret = wait_for_rebalance_to_complete(self.mnode,
                                             self.volname,
                                             timeout=500)
        self.assertTrue(ret, ("Rebalance is not yet complete on the volume "
                              "%s", self.volname))
        g.log.info("Rebalance is successfully complete on the volume %s",
                   self.volname)

        # Do a lookup on the mountpoint and note the time taken to run.
        # The time used for comparison is taken as a benchmark on using a
        # RHGS 3.5.2 for this TC. For 3.5.2, the time takes came out to be
        # 4 seconds. Now the condition for subtest to pass is for the lookup
        # should not be more than 10% of this value, i.e. 4.4 seconds.
        cmd = ("ls -R %s/" % (self.mounts[0].mountpoint))
        start_time = time()
        ret, _, _ = g.run(self.clients[0], cmd)
        end_time = time()
        self.assertEqual(ret, 0, "Failed to do a lookup")
        time_taken = end_time - start_time
        # ToDo: Implement a better approach to get benchmark value
        # self.assertTrue(time_taken <= 4.4, "Lookup takes more time "
        #                 "than the previously benchmarked value.")
        g.log.info("Lookup took : %d seconds", time_taken)
    def test_add_brick_rebalance_with_acl_set_to_files(self):
        """
        Test case:
        1. Create a volume, start it and mount it to a client.
        2. Create 10 files on the mount point and set acls on the files.
        3. Check the acl value and collect arequal-checksum.
        4. Add bricks to the volume and start rebalance.
        5. Check the value of acl(it should be same as step 3),
           collect and compare arequal-checksum with the one collected
           in step 3
        """
        # Create 10 files on the mount point.
        cmd = ("cd {}; for i in `seq 1 10`;do touch file$i;done".format(
            self.mount_point))
        ret, _, _ = g.run(self.first_client, cmd)
        self.assertFalse(ret, "Failed to create files on mount point")

        for number in range(1, 11):
            ret = set_acl(self.first_client, 'u:joker:rwx',
                          '{}/file{}'.format(self.mount_point, str(number)))
            self.assertTrue(ret, "Failed to set acl on files")

        # Collect arequal on mount point and check acl value
        arequal_checksum_before = collect_mounts_arequal(self.mounts[0])
        self._check_acl_set_to_files()
        g.log.info("Files created and acl set to files properly")

        # Add brick to volume
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, "Failed to add brick on volume %s" % self.volname)

        # Trigger rebalance and wait for it to complete
        ret, _, _ = rebalance_start(self.mnode, self.volname, force=True)
        self.assertEqual(
            ret, 0,
            "Failed to start rebalance on the volume %s" % self.volname)

        # Wait for rebalance to complete
        ret = wait_for_rebalance_to_complete(self.mnode,
                                             self.volname,
                                             timeout=1200)
        self.assertTrue(
            ret, "Rebalance is not yet complete on the volume "
            "%s" % self.volname)
        g.log.info("Rebalance successfully completed")

        # Check acl value if it's same as before rebalance
        self._check_acl_set_to_files()

        # Check for data loss by comparing arequal before and after ops
        arequal_checksum_after = collect_mounts_arequal(self.mounts[0])
        self.assertEqual(arequal_checksum_before, arequal_checksum_after,
                         "arequal checksum is NOT MATCHNG")
        g.log.info("arequal checksum and acl value are SAME")
    def test_rebalance_multiple_expansions(self):
        """
        Test case:
        1. Create a volume, start it and mount it
        2. Create some file on mountpoint
        3. Collect arequal checksum on mount point pre-rebalance
        4. Do the following 3 times:
        5. Expand the volume
        6. Start rebalance and wait for it to finish
        7. Collect arequal checksum on mount point post-rebalance
           and compare with value from step 3
        """

        # Create some file on mountpoint
        cmd = ("cd %s; for i in {1..500} ; do "
               "dd if=/dev/urandom of=file$i bs=10M count=1; done" %
               self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.first_client, cmd)
        self.assertEqual(ret, 0, "IO failed on volume %s" % self.volname)

        # Collect arequal checksum before rebalance
        arequal_checksum_before = collect_mounts_arequal(self.mounts[0])

        for _ in range(3):
            # Add brick to volume
            ret = expand_volume(self.mnode, self.volname, self.servers,
                                self.all_servers_info)
            self.assertTrue(ret,
                            "Failed to add brick on volume %s" % self.volname)

            # Trigger rebalance and wait for it to complete
            ret, _, _ = rebalance_start(self.mnode, self.volname, force=True)
            self.assertEqual(
                ret, 0, "Failed to start rebalance on "
                "volume %s" % self.volname)

            # Wait for rebalance to complete
            ret = wait_for_rebalance_to_complete(self.mnode,
                                                 self.volname,
                                                 timeout=1200)
            self.assertTrue(
                ret, "Rebalance is not yet complete on the volume "
                "%s" % self.volname)
            g.log.info("Rebalance successfully completed")

            # Collect arequal checksum after rebalance
            arequal_checksum_after = collect_mounts_arequal(self.mounts[0])

            # Check for data loss by comparing arequal before and after
            # rebalance
            self.assertEqual(arequal_checksum_before, arequal_checksum_after,
                             "arequal checksum is NOT MATCHNG")
            g.log.info("arequal checksum is SAME")
    def test_brick_full_add_brick_remove_brick(self):
        """
        Test case:
        1. Create a volume, start it and mount it.
        2. Fill few bricks till min-free-limit is reached.
        3. Add brick to the volume.(This should pass.)
        4. Set cluster.min-free-disk to 30%.
        5. Remove bricks from the volume.(This should pass.)
        6. Check for data loss by comparing arequal before and after.
        """
        # Fill few bricks till it is full
        bricks = get_all_bricks(self.mnode, self.volname)

        # Calculate the usable size and fill till it reaches
        # min free limit
        usable_size = get_usable_size_per_disk(bricks[0])
        subvols = get_subvols(self.mnode, self.volname)['volume_subvols']
        filename = "abc"
        for _ in range(0, usable_size):
            while (subvols[find_hashed_subvol(subvols, "/",
                                              filename)[1]] == subvols[0]):
                filename = self._get_random_string()
            ret, _, _ = g.run(
                self.mounts[0].client_system,
                "fallocate -l 1G {}/{}".format(self.mounts[0].mountpoint,
                                               filename))
            self.assertFalse(ret, "Failed to fill disk to min free limit")
            filename = self._get_random_string()
        g.log.info("Disk filled up to min free limit")

        # Collect arequal checksum before ops
        arequal_checksum_before = collect_mounts_arequal(self.mounts[0])

        # Add brick to volume
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, "Failed to add brick on volume %s" % self.volname)

        # Set cluster.min-free-disk to 30%
        ret = set_volume_options(self.mnode, self.volname,
                                 {'cluster.min-free-disk': '30%'})
        self.assertTrue(ret, "Failed to set cluster.min-free-disk to 30%")

        # Remove bricks from the volume
        ret = shrink_volume(self.mnode, self.volname, rebalance_timeout=1800)
        self.assertTrue(ret, "Failed to remove-brick from volume")
        g.log.info("Remove-brick rebalance successful")

        # Check for data loss by comparing arequal before and after ops
        arequal_checksum_after = collect_mounts_arequal(self.mounts[0])
        self.assertEqual(arequal_checksum_before, arequal_checksum_after,
                         "arequal checksum is NOT MATCHNG")
        g.log.info("arequal checksum is SAME")
示例#13
0
    def test_add_brick_running_volume(self):
        """Add brick to running volume
        """
        vol_bricks = {
            'before': get_all_bricks(self.mnode, self.volname),
            'after': []
        }

        g.log.debug("Expanding volume %s", self.volname)
        ret = expand_volume(mnode=self.mnode,
                            volname=self.volname,
                            servers=self.servers,
                            all_servers_info=self.all_servers_info)

        self.assertTrue(ret, 'Unable to expand volume %s' % self.volname)
        g.log.info("Add brick successfully %s", self.volname)

        vol_bricks['after'] = get_all_bricks(self.mnode, self.volname)

        self.assertGreater(len(vol_bricks['after']), len(vol_bricks['before']),
                           "Expected new volume size to be greater than old")

        g.log.debug("Expanding volume %s", self.volname)
        ret = expand_volume(mnode=self.mnode,
                            volname=self.volname,
                            servers=self.servers,
                            all_servers_info=self.all_servers_info)
        self.assertTrue(ret, 'Unable to expand volume %s' % self.volname)

        g.log.info("Success on expanding volume %s", self.volname)

        vol_bricks['before'] = vol_bricks['after']
        vol_bricks['after'] = get_all_bricks(self.mnode, self.volname)
        self.assertGreater(len(vol_bricks['after']), len(vol_bricks['before']),
                           "Expected new volume size to be greater than old")

        g.log.info('Success in add bricks to running volume')
    def _expand_volume_and_wait_for_rebalance_to_complete(self):
        """Expand volume and wait for rebalance to complete"""
        # Add brick to volume
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, "Failed to add brick on volume %s"
                        % self.volname)

        # Trigger rebalance and wait for it to complete
        ret, _, _ = rebalance_start(self.mnode, self.volname,
                                    force=True)
        self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s"
                         % self.volname)

        # Wait for rebalance to complete
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname,
                                             timeout=6000)
        self.assertTrue(ret, "Rebalance is not yet complete on the volume "
                             "%s" % self.volname)
        g.log.info("Rebalance successfully completed")
示例#15
0
    def _expand_volume_and_verify_rebalance(self):
        """ Expands the volume, trigger rebalance and verify file is copied"""

        # Expand the volume
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, "Failed to expand the volume")

        # Trigger rebalance
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(
            ret, 0,
            "Failed to start rebalance on the volume %s" % self.volname)

        # Wait for rebalance to complete
        ret = wait_for_rebalance_to_complete(self.mnode,
                                             self.volname,
                                             timeout=1200)
        self.assertTrue(
            ret, "Rebalance is not yet complete on the volume "
            "%s" % self.volname)
        g.log.info("Rebalance successfully completed")
    def test_stack_overflow(self):
        """
        Description: Tests to check that there is no stack overflow
                     in readdirp with parallel-readdir enabled.
        Steps :
        1) Create a volume.
        2) Mount the volume using FUSE.
        3) Enable performance.parallel-readdir and
           performance.readdir-ahead on the volume.
        4) Create 10000 files on the mount point.
        5) Add-brick to the volume.
        6) Perform fix-layout on the volume (not rebalance).
        7) From client node, rename all the files, this will result in creation
           of linkto files on the newly added brick.
        8) Do ls -l (lookup) on the mount-point.
        """
        # pylint: disable=too-many-statements
        # Enable performance.parallel-readdir and
        # performance.readdir-ahead on the volume
        options = {"performance.parallel-readdir": "enable",
                   "performance.readdir-ahead": "enable"}
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, "Failed to set volume options")
        g.log.info("Successfully set volume options")

        # Creating 10000 files on volume root
        m_point = self.mounts[0].mountpoint
        command = 'touch ' + m_point + '/file{1..10000}_0'
        ret, _, _ = g.run(self.clients[0], command)
        self.assertEqual(ret, 0, "File creation failed on %s"
                         % m_point)
        g.log.info("Files successfully created on the mount point")

        # Add bricks to the volume
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Failed to expand the volume %s",
                              self.volname))
        g.log.info("Expanding volume is successful on "
                   "volume %s", self.volname)

        # Perform fix-layout on the volume
        ret, _, _ = rebalance_start(self.mnode, self.volname, fix_layout=True)
        self.assertEqual(ret, 0, 'Failed to start rebalance')
        g.log.info('Rebalance is started')

        # Wait for fix-layout to complete
        ret = wait_for_fix_layout_to_complete(self.mnode, self.volname,
                                              timeout=3000)
        self.assertTrue(ret, ("Fix-layout failed on volume %s",
                              self.volname))
        g.log.info("Fix-layout is successful on "
                   "volume %s", self.volname)

        # Rename all files from client node
        for i in range(1, 10000):
            ret = move_file(self.clients[0],
                            '{}/file{}_0'.format(m_point, i),
                            '{}/file{}_1'.format(m_point, i))
            self.assertTrue(ret, "Failed to rename files")
        g.log.info("Files renamed successfully")

        # Perform lookup from the mount-point
        cmd = "ls -lR " + m_point
        ret, _, _ = g.run(self.mounts[0].client_system, cmd)
        self.assertEqual(ret, 0, "Failed to lookup")
        g.log.info("Lookup successful")
示例#17
0
    def test_data_self_heal_algorithm_diff_heal_command(self):
        """
        Test Volume Option - 'cluster.data-self-heal-algorithm' : 'diff'

        Description:
        - set the volume option
        "metadata-self-heal": "off"
        "entry-self-heal": "off"
        "data-self-heal": "off"
        "data-self-heal-algorithm": "diff"
        "self-heal-daemon": "off"
        - create IO
        - calculate arequal
        - bring down all bricks processes from selected set
        - modify the data
        - get arequal before getting bricks online
        - bring bricks online
        - expand volume by adding bricks to the volume
        - do rebalance
        - set the volume option "self-heal-daemon": "on" and check for daemons
        - start healing
        - check if heal is completed
        - check for split-brain
        - calculate arequal and compare with arequal before bringing bricks
        offline and after bringing bricks online
        """
        # pylint: disable=too-many-branches,too-many-statements
        # Setting options
        g.log.info('Setting options...')
        options = {
            "metadata-self-heal": "off",
            "entry-self-heal": "off",
            "data-self-heal": "off",
            "data-self-heal-algorithm": "diff"
        }
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, 'Failed to set options')
        g.log.info("Options "
                   "'metadata-self-heal', "
                   "'entry-self-heal', "
                   "'data-self-heal', "
                   "'self-heal-daemon' "
                   "are set to 'off',"
                   "'data-self-heal-algorithm' "
                   "is set to 'diff' successfully")

        # Creating files on client side
        all_mounts_procs = []
        g.log.info("Generating data for %s:%s", self.mounts[0].client_system,
                   self.mounts[0].mountpoint)
        # Creating files
        command = "/usr/bin/env python %s create_files -f 100 %s" % (
            self.script_upload_path, self.mounts[0].mountpoint)

        proc = g.run_async(self.mounts[0].client_system,
                           command,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")

        # Setting options
        g.log.info('Setting options...')
        options = {"self-heal-daemon": "off"}
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, 'Failed to set options')
        g.log.info("Option 'self-heal-daemon' is set to 'off' successfully")

        # Select bricks to bring offline
        bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
            self.mnode, self.volname))
        bricks_to_bring_offline = list(
            filter(None, (bricks_to_bring_offline_dict['hot_tier_bricks'] +
                          bricks_to_bring_offline_dict['cold_tier_bricks'] +
                          bricks_to_bring_offline_dict['volume_bricks'])))

        # Bring brick offline
        g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline)
        ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
        self.assertTrue(
            ret, 'Failed to bring bricks %s offline' % bricks_to_bring_offline)

        ret = are_bricks_offline(self.mnode, self.volname,
                                 bricks_to_bring_offline)
        self.assertTrue(ret,
                        'Bricks %s are not offline' % bricks_to_bring_offline)
        g.log.info('Bringing bricks %s offline is successful',
                   bricks_to_bring_offline)

        # Modify the data
        all_mounts_procs = []
        g.log.info("Modifying data for %s:%s", self.mounts[0].client_system,
                   self.mounts[0].mountpoint)
        command = ("/usr/bin/env python %s create_files -f 100 "
                   "--fixed-file-size 1M %s" %
                   (self.script_upload_path, self.mounts[0].mountpoint))

        proc = g.run_async(self.mounts[0].client_system,
                           command,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")

        # Get arequal before getting bricks online
        g.log.info('Getting arequal before getting bricks online...')
        ret, result_before_online = collect_mounts_arequal(self.mounts)
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting arequal before getting bricks online '
                   'is successful')

        # Bring brick online
        g.log.info('Bringing bricks %s online...', bricks_to_bring_offline)
        ret = bring_bricks_online(self.mnode, self.volname,
                                  bricks_to_bring_offline)
        self.assertTrue(
            ret, 'Failed to bring bricks %s online' % bricks_to_bring_offline)
        g.log.info('Bringing bricks %s online is successful',
                   bricks_to_bring_offline)

        # Expand volume by adding bricks to the volume
        g.log.info("Start adding bricks to volume...")
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Failed to expand the volume when IO in "
                              "progress on volume %s", self.volname))
        g.log.info("Expanding volume is successful on volume %s", self.volname)

        # Do rebalance
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, 'Failed to start rebalance')
        g.log.info('Rebalance is started')

        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, 'Rebalance is not completed')
        g.log.info('Rebalance is completed successfully')

        # Setting options
        g.log.info('Setting options...')
        options = {"self-heal-daemon": "on"}
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, 'Failed to set options')
        g.log.info("Option 'self-heal-daemon' is set to 'on' successfully")

        # Wait for self-heal-daemons to be online
        g.log.info("Waiting for self-heal-daemons to be online")
        ret = is_shd_daemonized(self.all_servers)
        self.assertTrue(ret, "Either No self heal daemon process found")
        g.log.info("All self-heal-daemons are online")

        # Start healing
        ret = trigger_heal(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal is not started')
        g.log.info('Healing is started')

        # Monitor heal completion
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal has not yet completed')

        # Check if heal is completed
        ret = is_heal_complete(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal is not complete')
        g.log.info('Heal is completed successfully')

        # Check for split-brain
        ret = is_volume_in_split_brain(self.mnode, self.volname)
        self.assertFalse(ret, 'Volume is in split-brain state')
        g.log.info('Volume is not in split-brain state')

        # Get arequal after getting bricks online
        g.log.info('Getting arequal after getting bricks online...')
        ret, result_after_online = collect_mounts_arequal(self.mounts)
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting arequal after getting bricks online '
                   'is successful')

        # Checking arequals before bringing bricks offline
        # and after bringing bricks online
        self.assertItemsEqual(result_before_online, result_after_online,
                              'Checksums are not equal')
        g.log.info('Checksums are equal')
示例#18
0
    def test_expanding_volume_when_io_in_progress(self):
        # pylint: disable=too-many-statements
        # Log Volume Info and Status before expanding the volume.
        g.log.info("Logging volume info and Status before expanding volume")
        log_volume_info_and_status(self.mnode, self.volname)

        # Expanding volume by adding bricks to the volume when IO in progress
        g.log.info("Start adding bricks to volume when IO in progress")
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Failed to expand the volume while IO in "
                              "progress on volume %s", self.volname))
        g.log.info(
            "Expanding volume while IO in progress on "
            "volume %s : Success", self.volname)

        # Wait for gluster processes to come online
        g.log.info("Wait for gluster processes to come online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to wait for volume %s processes to "
                              "be online", self.volname))
        g.log.info("Waiting for volume %s process to be online", self.volname)

        # Log Volume Info and Status after expanding the volume
        g.log.info("Logging volume info and Status after expanding volume")
        log_volume_info_and_status(self.mnode, self.volname)

        # Verify volume's all process are online
        g.log.info("Verifying volume's all process are online")
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Volume %s : All process are not online", self.volname))
        g.log.info("Volume %s : All process are online", self.volname)

        # Start Rebalance
        g.log.info("Starting Rebalance on the volume")
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to start rebalance on the volume "
                                  "%s", self.volname))
        g.log.info("Started rebalance on the volume %s: Success", self.volname)

        # Wait for rebalance to complete
        g.log.info("Waiting for rebalance to complete")
        ret = wait_for_rebalance_to_complete(self.mnode,
                                             self.volname,
                                             timeout=1800)
        self.assertTrue(ret, ("Rebalance is not yet complete on the volume "
                              "%s", self.volname))
        g.log.info("Rebalance status on volume %s: Complete", self.volname)

        # Check Rebalance status after rebalance is complete
        g.log.info("Checking Rebalance status")
        ret, _, _ = rebalance_status(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to get rebalance status for the "
                                  "volume %s", self.volname))
        g.log.info("Rebalance status on volume %s: Complete", self.volname)

        # Validate IO
        g.log.info("Wait for IO to complete and validate IO ...")
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.io_validation_complete = True
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("IO on all mounts: Complete")

        # List all files and dirs created
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")
        g.log.info("List all files and directories: Success")

        # DHT Layout validation
        g.log.debug("Verifying hash layout values %s:%s", self.clients[0],
                    self.mounts[0].mountpoint)
        ret = validate_files_in_dir(self.clients[0],
                                    self.mounts[0].mountpoint,
                                    test_type=LAYOUT_IS_COMPLETE,
                                    file_type=FILETYPE_DIRS)
        self.assertTrue(ret, "LAYOUT_IS_COMPLETE: FAILED")
        g.log.info("LAYOUT_IS_COMPLETE: PASS")

        # Checking if there are any migration failures
        status = get_rebalance_status(self.mnode, self.volname)
        for each_node in status['node']:
            self.assertEqual(
                0, int(each_node['failures']),
                "Rebalance failed to migrate few files on %s" %
                each_node['nodeName'])
            g.log.info("No migration failures on %s", each_node['nodeName'])
示例#19
0
    def test_rebalance_with_brick_down(self):
        """
        Rebalance with brick down in replica
        - Create a Replica volume.
        - Bring down one of the brick down in the replica pair
        - Do some IO and create files on the mount point
        - Add a pair of bricks to the volume
        - Initiate rebalance
        - Bring back the brick which was down.
        - After self heal happens, all the files should be present.
        """
        # Log the volume info and status before brick is down.
        log_volume_info_and_status(self.mnode, self.volname)

        # Bring one fo the bricks offline
        brick_list = get_all_bricks(self.mnode, self.volname)
        ret = bring_bricks_offline(self.volname, choice(brick_list))

        # Log the volume info and status after brick is down.
        log_volume_info_and_status(self.mnode, self.volname)

        # Create files at mountpoint.
        cmd = (
            "/usr/bin/env python %s create_files "
            "-f 2000 --fixed-file-size 1k --base-file-name file %s"
            % (self.script_upload_path, self.mounts[0].mountpoint))
        proc = g.run_async(
            self.mounts[0].client_system, cmd, user=self.mounts[0].user)
        self.all_mounts_procs.append(proc)

        # Wait for IO to complete.
        self.assertTrue(wait_for_io_to_complete(self.all_mounts_procs,
                                                self.mounts[0]),
                        "IO failed on some of the clients")
        g.log.info("IO completed on the clients")

        # Compute the arequal checksum before bringing all bricks online
        arequal_before_all_bricks_online = collect_mounts_arequal(self.mounts)

        # Log the volume info and status before expanding volume.
        log_volume_info_and_status(self.mnode, self.volname)

        # Expand the volume.
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Failed to expand the volume %s", self.volname))
        g.log.info("Expanding volume is successful on "
                   "volume %s", self.volname)

        # Log the voluem info after expanding volume.
        log_volume_info_and_status(self.mnode, self.volname)

        # Start Rebalance.
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to start rebalance on the volume "
                                  "%s", self.volname))
        g.log.info("Successfully started rebalance on the volume %s",
                   self.volname)

        # Wait for rebalance to complete
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, ("Rebalance is not yet complete on the volume "
                              "%s", self.volname))
        g.log.info("Rebalance is successfully complete on the volume %s",
                   self.volname)

        # Log the voluem info and status before bringing all bricks online
        log_volume_info_and_status(self.mnode, self.volname)

        # Bring all bricks online.
        ret, _, _ = volume_start(self.mnode, self.volname, force=True)
        self.assertEqual(ret, 0, "Not able to start volume with force option")
        g.log.info("Volume start with force option successful.")

        # Log the volume info and status after bringing all beicks online
        log_volume_info_and_status(self.mnode, self.volname)

        # Monitor heal completion.
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, "heal has not yet completed")
        g.log.info("Self heal completed")

        # Compute the arequal checksum after all bricks online.
        arequal_after_all_bricks_online = collect_mounts_arequal(self.mounts)

        # Comparing arequal checksum before and after the operations.
        self.assertEqual(arequal_before_all_bricks_online,
                         arequal_after_all_bricks_online,
                         "arequal checksum is NOT MATCHING")
        g.log.info("arequal checksum is SAME")
    def test_snap_rebalance(self):
        # pylint: disable=too-many-statements, too-many-locals
        """

        Snapshot rebalance contains tests which verifies snapshot clone,
        creating snapshot and performing I/O on mountpoints

        Steps:

        1. Create snapshot of a volume
        2. Activate snapshot
        3. Clone snapshot and Activate
        4. Mount Cloned volume
        5. Perform I/O on mount point
        6. Calculate areequal for bricks and mountpoints
        7. Add-brick more brick to cloned volume
        8. Initiate Re-balance
        9. validate areequal of bricks and mountpoints
        """

        # Creating snapshot:
        g.log.info("Starting to Create snapshot")
        ret, _, _ = snap_create(self.mnode, self.volname, self.snap)
        self.assertEqual(
            ret, 0, ("Failed to create snapshot for volume %s" % self.volname))
        g.log.info("Snapshot %s created successfully for volume %s", self.snap,
                   self.volname)

        # Activating snapshot
        g.log.info("Starting to Activate Snapshot")
        ret, _, _ = snap_activate(self.mnode, self.snap)
        self.assertEqual(ret, 0,
                         ("Failed to Activate snapshot %s" % self.snap))
        g.log.info("Snapshot %s activated successfully", self.snap)

        # Creating a Clone of snapshot:
        g.log.info("creating Clone Snapshot")
        ret, _, _ = snap_clone(self.mnode, self.snap, self.clone)
        self.assertEqual(ret, 0, ("Failed to clone volume %s" % self.clone))
        g.log.info("clone volume %s created successfully", self.clone)

        # Starting clone volume
        g.log.info("starting clone volume")
        ret, _, _ = volume_start(self.mnode, self.clone)
        self.assertEqual(ret, 0, "Failed to start %s" % self.clone)
        g.log.info("clone volume %s started successfully", self.clone)

        # Mounting a clone volume
        g.log.info("Mounting created clone volume")
        ret, _, _ = mount_volume(self.clone, self.mount_type, self.mount1,
                                 self.mnode, self.clients[0])
        self.assertEqual(ret, 0,
                         "clone Volume mount failed for %s" % self.clone)
        g.log.info("cloned volume %s mounted Successfully", self.clone)

        # Validate clone volume mounted or not
        g.log.info("Validate clone volume mounted or not")
        ret = is_mounted(self.clone, self.mount1, self.mnode, self.clients[0],
                         self.mount_type)
        self.assertTrue(
            ret, "Cloned Volume not mounted on mount point: %s" % self.mount1)
        g.log.info("Cloned Volume %s mounted on %s", self.clone, self.mount1)

        # write files to mountpoint
        g.log.info("Starting IO on %s mountpoint...", self.mount1)
        all_mounts_procs = []
        cmd = ("/usr/bin/env python %s create_files "
               "-f 10 --base-file-name file %s" %
               (self.script_upload_path, self.mount1))
        proc = g.run(self.clients[0], cmd)
        all_mounts_procs.append(proc)

        self.check_arequal()

        # expanding volume
        g.log.info("Starting to expand volume")
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, "Failed to expand volume %s" % self.clone)
        g.log.info("Expand volume successful")

        ret, _, _ = rebalance_start(self.mnode, self.clone)
        self.assertEqual(ret, 0, "Failed to start rebalance")
        g.log.info("Successfully started rebalance on the "
                   "volume %s", self.clone)

        # Log Rebalance status
        g.log.info("Log Rebalance status")
        _, _, _ = rebalance_status(self.mnode, self.clone)

        # Wait for rebalance to complete
        g.log.info("Waiting for rebalance to complete")
        ret = wait_for_rebalance_to_complete(self.mnode, self.clone)
        self.assertTrue(ret, ("Rebalance is not yet complete "
                              "on the volume %s", self.clone))
        g.log.info("Rebalance is successfully complete on "
                   "the volume %s", self.clone)

        # Check Rebalance status after rebalance is complete
        g.log.info("Checking Rebalance status")
        ret, _, _ = rebalance_status(self.mnode, self.clone)
        self.assertEqual(ret, 0, ("Failed to get rebalance status for "
                                  "the volume %s", self.clone))
        g.log.info("Successfully got rebalance status of the "
                   "volume %s", self.clone)

        self.check_arequal()
    def test_expanding_volume_when_io_in_progress(self):
        """Test expanding volume (Increase distribution) using existing
        servers bricks when IO is in progress.

        Description:
            - add bricks
            - starts rebalance
            - wait for rebalance to complete
            - validate IO
        """
        # Log Volume Info and Status before expanding the volume.
        g.log.info("Logging volume info and Status before expanding volume")
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Logging volume info and status failed on "
                              "volume %s", self.volname))
        g.log.info("Successful in logging volume info and status of volume %s",
                   self.volname)

        # Expanding volume by adding bricks to the volume when IO in progress
        g.log.info("Start adding bricks to volume when IO in progress")
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Failed to expand the volume when IO in "
                              "progress on volume %s", self.volname))
        g.log.info(
            "Expanding volume when IO in progress is successful on "
            "volume %s", self.volname)

        # Wait for gluster processes to come online
        time.sleep(30)

        # Log Volume Info and Status after expanding the volume
        g.log.info("Logging volume info and Status after expanding volume")
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Logging volume info and status failed on "
                              "volume %s", self.volname))
        g.log.info("Successful in logging volume info and status of volume %s",
                   self.volname)

        # Verify volume's all process are online
        g.log.info("Verifying volume's all process are online")
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Volume %s : All process are not online", self.volname))
        g.log.info("Volume %s : All process are online", self.volname)

        # Start Rebalance
        g.log.info("Starting Rebalance on the volume")
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to start rebalance on the volume "
                                  "%s", self.volname))
        g.log.info("Successfully started rebalance on the volume %s",
                   self.volname)

        # Log Rebalance status
        g.log.info("Log Rebalance status")
        _, _, _ = rebalance_status(self.mnode, self.volname)

        # Wait for rebalance to complete
        g.log.info("Waiting for rebalance to complete")
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, ("Rebalance is not yet complete on the volume "
                              "%s", self.volname))
        g.log.info("Rebalance is successfully complete on the volume %s",
                   self.volname)

        # Check Rebalance status after rebalance is complete
        g.log.info("Checking Rebalance status")
        ret, _, _ = rebalance_status(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to get rebalance status for the "
                                  "volume %s", self.volname))
        g.log.info("Successfully got rebalance status of the volume %s",
                   self.volname)

        # Validate IO
        g.log.info("Wait for IO to complete and validate IO ...")
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.io_validation_complete = True
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("IO is successful on all mounts")

        # List all files and dirs created
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")
        g.log.info("Listing all files and directories is successful")
    def test_subdir_with_addbrick(self):

        # pylint: disable=too-many-statements
        """
        Mount the volume
        Create 2 subdir on mount point, subdir1 and subdir2
        Auth allow - Client1(subdir1,subdir2),Client2(subdir1,subdir2)
        Mount the subdir1 on client 1 and subdir2 on client2
        Start IO's on both subdirs
        Perform add-brick and rebalance
        """

        # Create  directories subdir1 and subdir2 on mount point
        ret = mkdir(self.mounts[0].client_system,
                    "%s/subdir1" % self.mounts[0].mountpoint)
        self.assertTrue(
            ret, ("Failed to create directory 'subdir1' on"
                  "volume %s from client %s" %
                  (self.mounts[0].volname, self.mounts[0].client_system)))
        ret = mkdir(self.mounts[0].client_system,
                    "%s/subdir2" % self.mounts[0].mountpoint)
        self.assertTrue(
            ret, ("Failed to create directory 'subdir2' on"
                  "volume %s from client %s" %
                  (self.mounts[0].volname, self.mounts[0].client_system)))
        # unmount volume
        ret = self.unmount_volume(self.mounts)
        self.assertTrue(ret, "Volumes Unmount failed")
        g.log.info("Volumes Unmounted successfully")

        # Set authentication on the subdirectory subdir1
        # and subdir2 to access by 2 clients
        g.log.info(
            'Setting authentication on subdir1 and subdir2'
            'for client %s and %s', self.clients[0], self.clients[0])
        ret = set_auth_allow(
            self.volname, self.mnode, {
                '/subdir1': [self.clients[0], self.clients[1]],
                '/subdir2': [self.clients[0], self.clients[1]]
            })
        self.assertTrue(
            ret, 'Failed to set Authentication on volume %s' % self.volume)

        # Creating mount list for subdirectories
        self.subdir_mounts = [
            copy.deepcopy(self.mounts[0]),
            copy.deepcopy(self.mounts[1])
        ]
        self.subdir_mounts[0].volname = "%s/subdir1" % self.volname
        self.subdir_mounts[1].volname = "%s/subdir2" % self.volname

        # Mount Subdirectory "subdir1" on client 1 and "subdir2" on client 2
        for mount_obj in self.subdir_mounts:
            ret = mount_obj.mount()
            self.assertTrue(
                ret, ("Failed to mount  %s on client"
                      " %s" % (mount_obj.volname, mount_obj.client_system)))
            g.log.info("Successfully mounted %s on client %s",
                       mount_obj.volname, mount_obj.client_system)
        g.log.info("Successfully mounted subdirectories on client1"
                   "and clients 2")

        # Start IO on all mounts.
        all_mounts_procs = []
        count = 1
        for mount_obj in self.subdir_mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 2 "
                   "--dir-length 10 "
                   "--max-num-of-dirs 5 "
                   "--num-of-files 5 %s" %
                   (self.script_upload_path, count, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)
            count = count + 10

        # Validate IO
        g.log.info("Validating IO's")
        ret = validate_io_procs(all_mounts_procs, self.subdir_mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("Successfully validated all io's")

        # Get stat of all the files/dirs created.
        g.log.info("Get stat of all the files/dirs created.")
        ret = get_mounts_stat(self.subdir_mounts)
        self.assertTrue(ret, "Stat failed on some of the clients")
        g.log.info("Successfully got stat of all files/dirs created")

        # Start add-brick (subvolume-increase)
        g.log.info("Start adding bricks to volume when IO in progress")
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Failed to expand the volume when IO in "
                              "progress on volume %s", self.volname))
        g.log.info(
            "Expanding volume when IO in progress is successful on "
            "volume %s", self.volname)

        # Log Volume Info and Status after expanding the volume
        g.log.info("Logging volume info and Status after expanding volume")
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Logging volume info and status failed on "
                              "volume %s", self.volname))
        g.log.info("Successful in logging volume info and status of volume %s",
                   self.volname)

        # Wait for volume processes to be online
        g.log.info("Wait for volume processes to be online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, ("All process  for volume %s are not"
                              "online", self.volname))
        g.log.info("All volume %s processes are now online", self.volname)

        # Start Rebalance
        g.log.info("Starting Rebalance on the volume")
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to start rebalance on the volume "
                                  "%s", self.volname))
        g.log.info("Successfully started rebalance on the volume %s",
                   self.volname)

        # Wait for rebalance to complete
        g.log.info("Waiting for rebalance to complete")
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname, 600)
        self.assertTrue(
            ret, "Rebalance did not complete "
            "despite waiting for 10 minutes")
        g.log.info("Rebalance successfully completed on the volume %s",
                   self.volname)

        # Again validate if subdirectories are still mounted post add-brick

        for mount_obj in self.subdir_mounts:
            ret = mount_obj.is_mounted()
            self.assertTrue(
                ret, ("Subdirectory %s is not mounted on client"
                      " %s" % (mount_obj.volname, mount_obj.client_system)))
            g.log.info("Subdirectory %s is mounted on client %s",
                       mount_obj.volname, mount_obj.client_system)
        g.log.info("Successfully validated that subdirectories are mounted"
                   "on client1 and clients 2 post add-brick operation")
    def test_glustershd_on_all_volume_types(self):
        """
        Test Script to verify the glustershd server vol file
        has only entries for replicate volumes

        * Create multiple volumes and start all volumes
        * Check the glustershd processes - Only One glustershd should be listed
        * Check the glustershd server vol file - should contain entries only
                                             for replicated involved volumes
        * Add bricks to the replicate volume - it should convert to
                                               distributed-replicate
        * Check the glustershd server vol file - newly added bricks
                                                 should present
        * Check the glustershd processes - Only 1 glustershd should be listed

        """
        # pylint: disable=too-many-statements
        nodes = self.servers

        # check the self-heal daemon process
        g.log.info("Starting to get self-heal daemon process on "
                   "nodes %s", nodes)
        ret, glustershd_pids = get_self_heal_daemon_pid(nodes)
        self.assertTrue(ret, ("Either No self heal daemon process found or "
                              "more than One self heal daemon process "
                              "found : %s" % glustershd_pids))
        g.log.info(
            "Successful in getting Single self heal daemon process"
            " on all nodes %s", nodes)

        # For all the volumes, check whether bricks present in
        # glustershd server vol file
        volume_list = get_volume_list(self.mnode)
        for volume in volume_list:
            g.log.info("Volume Name: %s", volume)
            volume_type_info = get_volume_type_info(self.mnode, volume)
            volume_type = (volume_type_info['volume_type_info']['typeStr'])

            # get the bricks for the volume
            g.log.info("Fetching bricks for the volume : %s", volume)
            bricks_list = get_all_bricks(self.mnode, volume)
            g.log.info("Brick List : %s", bricks_list)

            # validate the bricks present in volume info with
            # glustershd server volume file
            g.log.info("Start parsing file %s on "
                       "node %s", self.GLUSTERSHD, self.mnode)
            ret = do_bricks_exist_in_shd_volfile(self.mnode, volume,
                                                 bricks_list)
            if volume_type == 'Distribute':
                self.assertFalse(ret,
                                 ("Bricks exist in glustershd server "
                                  "volume file for %s Volume" % volume_type))
                g.log.info(
                    "EXPECTED : Bricks doesn't exist in glustershd "
                    "server volume file for %s Volume", volume_type)
            else:
                self.assertTrue(ret, ("Brick List from volume info is "
                                      "different from glustershd server "
                                      "volume file. Please check log "
                                      "file for details"))
                g.log.info(
                    "Bricks exist in glustershd server volume file "
                    "for %s Volume", volume_type)

        # expanding volume for Replicate
        for volume in volume_list:
            volume_type_info = get_volume_type_info(self.mnode, volume)
            volume_type = (volume_type_info['volume_type_info']['typeStr'])
            if volume_type == 'Replicate':
                g.log.info("Start adding bricks to volume %s", volume)
                ret = expand_volume(self.mnode, volume, self.servers,
                                    self.all_servers_info)
                self.assertTrue(ret, ("Failed to add bricks to "
                                      "volume %s " % volume))
                g.log.info("Add brick successful")

                # Log Volume Info and Status after expanding the volume
                g.log.info("Logging volume info and Status after "
                           "expanding volume")
                ret = log_volume_info_and_status(self.mnode, volume)
                self.assertTrue(ret, ("Logging volume info and status failed "
                                      "on volume %s", volume))
                g.log.info(
                    "Successful in logging volume info and status "
                    "of volume %s", volume)

                # Verify volume's all process are online for 60 sec
                g.log.info("Verifying volume's all process are online")
                ret = wait_for_volume_process_to_be_online(
                    self.mnode, volume, 60)
                self.assertTrue(ret, ("Volume %s : All process are not "
                                      "online", volume))
                g.log.info(
                    "Successfully verified volume %s processes "
                    "are online", volume)

                # check the type for the replicate volume
                volume_type_info_for_replicate_after_adding_bricks = \
                    get_volume_type_info(self.mnode, volume)
                volume_type_for_replicate_after_adding_bricks = \
                    (volume_type_info_for_replicate_after_adding_bricks
                     ['volume_type_info']['typeStr'])

                self.assertEqual(volume_type_for_replicate_after_adding_bricks,
                                 'Distributed-Replicate',
                                 ("Replicate volume type is not converted to "
                                  "Distributed-Replicate after adding bricks"))
                g.log.info("Replicate Volume is successfully converted to"
                           " Distributed-Replicate after adding bricks")

                # get the bricks for the volume after expanding
                bricks_list_after_expanding = get_all_bricks(
                    self.mnode, volume)
                g.log.info("Brick List after expanding "
                           "volume: %s", bricks_list_after_expanding)

                # validate the bricks present in volume info
                # with glustershd server volume file after adding bricks
                g.log.info("Starting parsing file %s", self.GLUSTERSHD)
                ret = do_bricks_exist_in_shd_volfile(
                    self.mnode, volume, bricks_list_after_expanding)

                self.assertTrue(ret, ("Brick List from volume info is "
                                      "different from glustershd server "
                                      "volume file after expanding bricks. "
                                      "Please check log file for details"))
                g.log.info("Brick List from volume info is same as from "
                           "glustershd server volume file after "
                           "expanding bricks.")

        # check the self-heal daemon process
        g.log.info("Starting to get self-heal daemon process on "
                   "nodes %s", nodes)
        ret, glustershd_pids_after_adding_bricks = \
            get_self_heal_daemon_pid(nodes)
        self.assertTrue(ret,
                        ("Either No self heal daemon process found or "
                         "more than One self heal daemon process "
                         "found : %s" % glustershd_pids_after_adding_bricks))
        g.log.info(
            "Successful in getting Single self heal daemon process"
            " on all nodes %s", nodes)

        self.assertNotEqual(
            glustershd_pids, glustershd_pids_after_adding_bricks,
            "Self Daemon process is same before and"
            " after adding bricks")
        g.log.info("Self Heal Daemon Process is different before and "
                   "after adding bricks")
示例#24
0
    def test_snapshot_while_rebalance(self):
        # pylint: disable=too-many-statements, missing-docstring
        # Start IO on all mounts.
        all_mounts_procs = []
        count = 1
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 2 "
                   "--dir-length 10 "
                   "--max-num-of-dirs 5 "
                   "--num-of-files 5 %s" %
                   (self.script_upload_path, count, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)
            count = count + 10

        # Validate IO
        g.log.info("Validating IO's")
        ret = validate_io_procs(all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("Successfully validated all io's")

        # Get stat of all the files/dirs created.
        g.log.info("Get stat of all the files/dirs created.")
        ret = get_mounts_stat(self.mounts)
        self.assertTrue(ret, "Stat failed on some of the clients")
        g.log.info("Successfully got stat of all files/dirs created")

        # Create one snapshot of volume using no-timestamp option
        cmd_str = ("gluster snapshot create %s %s %s" %
                   ("snapy", self.volname, "no-timestamp"))
        ret, _, _ = g.run(self.mnode, cmd_str)
        self.assertEqual(ret, 0,
                         ("Failed to create snapshot for %s" % self.volname))
        g.log.info("Snapshot snapy created successfully "
                   "for volume %s", self.volname)

        # Check for no of snaps using snap_list it should be 1
        snap_list = get_snap_list(self.mnode)
        self.assertEqual(
            1, len(snap_list), "Expected 1 snapshot "
            "found %s snapshots" % len(snap_list))
        g.log.info("Successfully validated number of snaps.")

        # validate snap name
        self.assertIn("snapy", snap_list, " snap not found")
        g.log.info("Successfully validated names of snap")

        # get the bricks for the volume
        g.log.info("Fetching bricks for the volume : %s", self.volname)
        bricks_list = get_all_bricks(self.mnode, self.volname)
        g.log.info("Brick List : %s", bricks_list)

        # expanding volume
        g.log.info("Start adding bricks to volume %s", self.volname)
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Failed to add bricks to "
                              "volume %s " % self.volname))
        g.log.info("Add brick successful")

        # Log Volume Info and Status after expanding the volume
        g.log.info("Logging volume info and Status after expanding volume")
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Logging volume info and status failed "
                              "on volume %s", self.volname))
        g.log.info(
            "Successful in logging volume info and status "
            "of volume %s", self.volname)

        # Verify volume's all process are online for 60 sec
        g.log.info("Verifying volume's all process are online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname,
                                                   60)
        self.assertTrue(ret, ("Volume %s : All process are not "
                              "online", self.volname))
        g.log.info("Successfully Verified volume %s "
                   "processes are online", self.volname)

        # Start Rebalance
        g.log.info("Starting Rebalance on the volume")
        ret, _, err = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0,
                         ("Failed to start rebalance on "
                          "the volume %s with error %s" % (self.volname, err)))
        g.log.info("Successfully started rebalance on the "
                   "volume %s", self.volname)

        # Log Rebalance status
        g.log.info("Log Rebalance status")
        ret, _, _ = rebalance_status(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to log rebalance status")
        g.log.info("successfully logged rebalance status")

        # Create one snapshot of volume during rebalance
        cmd_str = ("gluster snapshot create %s %s %s" %
                   ("snapy_rebal", self.volname, "no-timestamp"))
        ret, _, _ = g.run(self.mnode, cmd_str)
        self.assertNotEqual(ret, 0, ("successfully created 'snapy_rebal'"
                                     " for %s" % self.volname))
        g.log.info("Snapshot 'snapy_rebal' not created as rebalance is in "
                   "progress check log")
        # Check for no of snaps using snap_list it should be 1
        snap_list = get_snap_list(self.mnode)
        self.assertEqual(
            1, len(snap_list), "Expected 1 snapshot "
            "found %s snapshot" % len(snap_list))
        g.log.info("Successfully validated number of snaps.")

        # Wait for rebalance to complete
        g.log.info("Waiting for rebalance to complete")
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, ("Rebalance is not yet complete "
                              "on the volume %s", self.volname))
        g.log.info("Rebalance is successfully complete on "
                   "the volume %s", self.volname)

        # Check Rebalance status after rebalance is complete
        g.log.info("Checking Rebalance status")
        ret, _, _ = rebalance_status(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to get rebalance status for "
                                  "the volume %s", self.volname))
        g.log.info("Successfully got rebalance status of the "
                   "volume %s", self.volname)

        # Create one snapshot of volume post rebalance with same name
        cmd_str = ("gluster snapshot create %s %s %s" %
                   ("snapy_rebal", self.volname, "no-timestamp"))
        ret, _, _ = g.run(self.mnode, cmd_str)
        self.assertEqual(ret, 0,
                         ("Failed to create snapshot for %s" % self.volname))
        g.log.info(
            "Snapshot snapy_rebal created successfully "
            "for volume  %s", self.volname)

        # Check for no of snaps using snap_list it should be 2
        snap_list = get_snap_list(self.mnode)
        self.assertEqual(
            2, len(snap_list), "Expected 2 snapshots "
            "found %s snapshot" % len(snap_list))
        g.log.info("Successfully validated number of snaps.")

        # validate snap name
        self.assertIn("snapy_rebal", snap_list, " snap not found")
        g.log.info("Successfully validated names of snap")
示例#25
0
    def test_rebalance_with_hidden_files(self):
        # pylint: disable=too-many-statements
        # Start IO on mounts
        g.log.info("Starting IO on all mounts...")
        self.all_mounts_procs = []
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("python %s create_files "
                   "--base-file-name . "
                   "-f 99 %s" %
                   (self.script_upload_path, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)

        # validate IO
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")

        # List all files and dirs created
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")
        g.log.info("Listing all files and directories is successful")

        # Verify DHT values across mount points
        for mount_obj in self.mounts:
            g.log.debug("Verifying hash layout values %s:%s",
                        mount_obj.client_system, mount_obj.mountpoint)
            ret = validate_files_in_dir(mount_obj.client_system,
                                        mount_obj.mountpoint,
                                        test_type=FILE_ON_HASHED_BRICKS,
                                        file_type=FILETYPE_FILES)
            self.assertTrue(
                ret, "Expected - Files are created on only "
                "sub-volume according to its hashed value")
            g.log.info("Hash layout values are verified %s:%s",
                       mount_obj.client_system, mount_obj.mountpoint)

        # Getting areequal checksum before rebalance
        g.log.info("Getting areequal checksum before rebalance")
        arequal_checksum_before_rebalance = collect_mounts_arequal(self.mounts)

        # Log Volume Info and Status before expanding the volume.
        g.log.info("Logging volume info and Status before expanding volume")
        log_volume_info_and_status(self.mnode, self.volname)

        # Expanding volume by adding bricks to the volume
        g.log.info("Start adding bricks to volume")
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Failed to expand the volume %s", self.volname))
        g.log.info("Expanding volume is successful on "
                   "volume %s", self.volname)

        # Wait for gluster processes to come online
        g.log.info("Wait for gluster processes to come online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to wait for volume %s processes to "
                              "be online", self.volname))
        g.log.info(
            "Successful in waiting for volume %s processes to be "
            "online", self.volname)

        # Verify volume's all process are online
        g.log.info("Verifying volume's all process are online")
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Volume %s : All process are not online ", self.volname))
        g.log.info("Volume %s : All process are online", self.volname)

        # Log Volume Info and Status after expanding the volume
        g.log.info("Logging volume info and Status after expanding volume")
        log_volume_info_and_status(self.mnode, self.volname)

        # Start Rebalance
        g.log.info("Starting Rebalance on the volume")
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to start rebalance on the volume "
                                  "%s", self.volname))
        g.log.info("Successfully started rebalance on the volume %s",
                   self.volname)

        # Wait for rebalance to complete
        g.log.info("Waiting for rebalance to complete")
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, ("Rebalance is not yet complete on the volume "
                              "%s", self.volname))
        g.log.info("Rebalance is successfully complete on the volume %s",
                   self.volname)

        # Checking if there are any migration failures
        status = get_rebalance_status(self.mnode, self.volname)
        for each_node in status['node']:
            failed_files_count = int(each_node['failures'])
            self.assertEqual(
                failed_files_count, 0,
                "Rebalance failed to migrate few files on %s" %
                each_node['nodeName'])
            g.log.info("There are no migration failures")

        # Getting areequal checksum after rebalance
        g.log.info("Getting areequal checksum after rebalance")
        arequal_checksum_after_rebalance = collect_mounts_arequal(self.mounts)

        # Comparing arequals checksum before and after rebalance
        g.log.info("Comparing arequals checksum before and after rebalance")
        self.assertEqual(arequal_checksum_before_rebalance,
                         arequal_checksum_after_rebalance,
                         "arequal checksum is NOT MATCHNG")
        g.log.info("arequal checksum is SAME")
示例#26
0
    def _testcase(self, number_of_expands=1):
        """
        Test case:
        1. Create a volume start it and mount on the client.
        2. Set full permission on the mount point.
        3. Add new user to the client.
        4. As the new user create dirs/files.
        5. Compute arequal checksum and check permission on / and subdir.
        6. expand cluster according to number_of_expands and start rebalance.
        7. After rebalance is completed:
        7.1 check arequal checksum
        7.2 verfiy no change in / and sub dir permissions.
        7.3 As the new user create and delete file/dir.
        """
        # Set full permissions on the mount point.
        ret = set_file_permissions(self.clients[0], self.mountpoint, "-R 777")
        self.assertTrue(ret, "Failed to set permissions on the mount point")
        g.log.info("Set full permissions on the mount point")

        # Create dirs/files as self.test_user
        cmd = (r'su -l %s -c "cd %s;'
               r'for i in {0..9}; do mkdir d\$i; done;'
               r'for i in {0..99}; do let x=\$i%%10;'
               r'dd if=/dev/urandom of=d\$x/f.\$i bs=1024 count=1; done"' %
               (self.user, self.mountpoint))
        ret, _, _ = g.run(self.client, cmd)
        self.assertEqual(ret, 0, ("Failed to create files as %s", self.user))
        g.log.info("IO as %s is successful", self.user)

        # check permission on / and subdir
        self._check_user_permission()

        # get arequal checksum before expand
        self.arequal_checksum_before = collect_mounts_arequal(self.mounts[0])

        self._logged_vol_info()

        # expand the volume
        for i in range(number_of_expands):
            ret = expand_volume(self.mnode, self.volname, self.servers,
                                self.all_servers_info)
            self.assertTrue(
                ret, ("Failed to expand iter %d volume %s", i, self.volname))

        self._logged_vol_info()
        # Start Rebalance and wait for completion
        self._start_rebalance_and_wait()

        # compare arequals checksum before and after rebalance
        self._get_arequal_and_check_if_equal_to_before()

        # permissions check on / and sub dir
        self._check_user_permission()

        # Create/Delete file as self.test_user
        cmd = ('su -l %s -c '
               '"cd %s; touch file.test;'
               'find . -mindepth 1 -maxdepth 1 -type d | xargs rm -rf"' %
               (self.user, self.mountpoint))
        ret, _, _ = g.run(self.client, cmd)

        self.assertEqual(ret, 0, ("User %s failed to create files", self.user))
        g.log.info("IO as %s is successful", self.user)
    def test_rebalance_with_quota_enabled(self):
        """
        Test rebalance with quota enabled on root.
        1. Create Volume of type distribute
        2. Set Quota limit on the root directory
        3. Do some IO to reach the Hard limit
        4. After IO ends, compute arequal checksum
        5. Add bricks to the volume.
        6. Start rebalance
        7. After rebalance is completed, check arequal checksum
        """
        # Enable Quota
        ret, _, _ = quota_enable(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, ("Failed to enable quota on the volume %s", self.volname))
        g.log.info("Successfully enabled quota on volume %s", self.volname)

        # Set the Quota timeouts to 0 for strict accounting
        ret, _, _ = quota_set_hard_timeout(self.mnode, self.volname, 0)
        self.assertEqual(
            ret, 0, ("Failed to set hard-timeout to 0 for %s", self.volname))
        ret, _, _ = quota_set_soft_timeout(self.mnode, self.volname, 0)
        self.assertEqual(
            ret, 0, ("Failed to set soft-timeout to 0 for %s", self.volname))
        g.log.info("Quota soft and hard timeout has been set to 0 for %s",
                   self.volname)

        # Set the quota limit of 1 GB on root dir of the volume
        ret, _, _ = quota_limit_usage(self.mnode, self.volname, "/", "1GB")
        self.assertEqual(ret, 0, "Failed to set Quota for dir root")
        g.log.info("Successfully set quota limit for dir root")

        # Do some IO until hard limit is reached.
        cmd = ("/usr/bin/env python %s create_files "
               "-f 1024 --fixed-file-size 1M --base-file-name file %s" %
               (self.script_upload_path, self.mounts[0].mountpoint))
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        self.all_mounts_procs.append(proc)

        # Wait for IO to complete and validate IO
        self.assertTrue(
            wait_for_io_to_complete(self.all_mounts_procs, self.mounts[0]),
            "IO failed on some of the clients")
        g.log.info("IO completed on the clients")

        # Validate quota
        ret = quota_validate(self.mnode,
                             self.volname,
                             path='/',
                             hard_limit=1073741824,
                             sl_exceeded=True,
                             hl_exceeded=True)
        self.assertTrue(ret, "Quota validate Failed for '/'")
        g.log.info("Quota Validated for path '/'")

        # Compute arequal checksum.
        arequal_checksum_before_rebalance = collect_mounts_arequal(self.mounts)

        # Log Volume info and status before expanding volume.
        log_volume_info_and_status(self.mnode, self.volname)

        # Expand the volume.
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Failed to expand the volume %s", self.volname))
        g.log.info("Expanding volume is successful on "
                   "volume %s", self.volname)

        # Log volume info and status after expanding volume.
        log_volume_info_and_status(self.mnode, self.volname)

        # Perform rebalance start operation.
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to  start rebalance on the volume "
                                  "%s", self.volname))
        g.log.info("Rebalance started.")

        # Check rebalance is in progress
        rebalance_status = get_rebalance_status(self.mnode, self.volname)
        ret = rebalance_status['aggregate']['statusStr']
        self.assertEqual(ret, "in progress", ("Rebalance is not in "
                                              "'in progress' state, either "
                                              "rebalance is in completed state"
                                              "  or failed to get rebalance "
                                              "status"))
        g.log.info("Rebalance is 'in progress' state")

        # Wait till rebalance ends.
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, ("Rebalance is not yet complete on the volume "
                              "%s", self.volname))
        g.log.info("Rebalance is successfully complete on the volume %s",
                   self.volname)

        # Validate quota
        ret = quota_validate(self.mnode,
                             self.volname,
                             path='/',
                             hard_limit=1073741824,
                             sl_exceeded=True,
                             hl_exceeded=True)
        self.assertTrue(ret, "Quota validate Failed for '/'")
        g.log.info("Quota Validated for path '/'")

        # Compute arequal checksum.
        arequal_checksum_after_rebalance = collect_mounts_arequal(self.mounts)

        # Comparing arequals checksum before and after rebalance.
        self.assertEqual(arequal_checksum_before_rebalance,
                         arequal_checksum_after_rebalance,
                         "arequal checksum is NOT MATCHING")
        g.log.info("arequal checksum is SAME")
示例#28
0
    def test_remove_brick_no_commit_followed_by_rebalance(self):
        """
        Description: Tests to check that there is no data loss when
                     remove-brick operation is stopped and then new bricks
                     are added to the volume.
         Steps :
         1) Create a volume.
         2) Mount the volume using FUSE.
         3) Create files and dirs on the mount-point.
         4) Calculate the arequal-checksum on the mount-point
         5) Start remove-brick operation on the volume.
         6) While migration is in progress, stop the remove-brick
            operation.
         7) Add-bricks to the volume and trigger rebalance.
         8) Wait for rebalance to complete.
         9) Calculate the arequal-checksum on the mount-point.
         """
        # Start IO on mounts
        m_point = self.mounts[0].mountpoint
        cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
               "--dir-length 10 --dir-depth 2 --max-num-of-dirs 1 "
               "--num-of-files 50 --file-type empty-file %s" %
               (self.script_upload_path, m_point))
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        g.log.info("IO on %s:%s is started successfully",
                   self.mounts[0].client_system, m_point)

        # Validate IO
        self.assertTrue(validate_io_procs([proc], self.mounts[0]),
                        "IO failed on some of the clients")

        # Calculate arequal-checksum before starting remove-brick
        ret, arequal_before = collect_mounts_arequal(self.mounts[0])
        self.assertTrue(ret, "Collecting arequal-checksum failed")

        # Form bricks list for volume shrink
        remove_brick_list = form_bricks_list_to_remove_brick(self.mnode,
                                                             self.volname,
                                                             subvol_name=1)
        self.assertIsNotNone(remove_brick_list, ("Volume %s: Failed to "
                                                 "form bricks list for "
                                                 "shrink", self.volname))
        g.log.info("Volume %s: Formed bricks list for shrink", self.volname)

        # Shrink volume by removing bricks
        ret, _, _ = remove_brick(self.mnode, self.volname, remove_brick_list,
                                 "start")
        self.assertEqual(ret, 0, ("Volume %s shrink failed ", self.volname))
        g.log.info("Volume %s shrink started ", self.volname)

        # Log remove-brick status
        ret, out, _ = remove_brick(self.mnode, self.volname, remove_brick_list,
                                   "status")
        self.assertEqual(ret, 0,
                         ("Remove-brick status failed on %s ", self.volname))

        # Check if migration is in progress
        if r'in progress' in out:
            # Stop remove-brick process
            g.log.info("Stop removing bricks from volume")
            ret, out, _ = remove_brick(self.mnode, self.volname,
                                       remove_brick_list, "stop")
            self.assertEqual(ret, 0, "Failed to stop remove-brick process")
            g.log.info("Stopped remove-brick process successfully")
        else:
            g.log.error("Migration for remove-brick is complete")

        # Sleep for 30 secs so that any running remove-brick process stops
        sleep(30)

        # Add bricks to the volume
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Volume %s: Add-brick failed", self.volname))
        g.log.info("Volume %s: Add-brick successful", self.volname)

        # Tigger rebalance
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, ("Volume %s: Failed to start rebalance", self.volname))
        g.log.info("Volume %s: Rebalance started ", self.volname)

        # Wait for rebalance to complete
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, "Rebalance has not completed")
        g.log.info("Rebalance has completed successfully")

        # Calculate arequal-checksum on mount-point
        ret, arequal_after = collect_mounts_arequal(self.mounts[0])
        self.assertTrue(ret, "Collecting arequal-checksum failed")

        # Check if there is any data loss
        self.assertEqual(set(arequal_before), set(arequal_after),
                         ("There is data loss"))
        g.log.info("The checksum before and after rebalance is same."
                   " There is no data loss.")
示例#29
0
    def test_add_bricks_io_mount_point(self):
        """
        Test Case Steps:
        1. Create, start and mount volume.
        2. Start I/O on volume.
        3. Add brick and start rebalance on the volume.
        4. Wait for rebalance to complete.
        5. Check for I/O failures if any.
        """

        # Mount volume
        ret = self.mount_volume(self.mounts)
        self.assertTrue(ret, 'Mount volume: FAIL')
        g.log.info('Mounted Volume %s: Success', self.volname)

        # Upload io scripts for running IO on mounts
        g.log.info(
            "Upload io scripts to clients %s for running IO on "
            "mounts", self.clients)
        script_location = "/usr/share/glustolibs/io/scripts/file_dir_ops.py"
        ret = upload_scripts(self.clients, script_location)
        if not ret:
            clients = ", ".join(self.clients)
            raise ExecutionError("Failed to upload IO scripts to clients %s" %
                                 clients)
        g.log.info("Successfully uploaded IO scripts to clients %s",
                   self.clients)

        # Start IO on mounts
        io_process = []
        for index, mount_obj in enumerate(self.mounts, start=1):
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 2 "
                   "--dir-length 2 "
                   "--max-num-of-dirs 2 "
                   "--num-of-files 10 %s" %
                   (script_location, index + 10, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            io_process.append(proc)

        # Expand volume
        ret = expand_volume(mnode=self.mnode,
                            volname=self.volname,
                            servers=self.servers,
                            all_servers_info=self.all_servers_info)
        self.assertTrue(ret, "Expand volume %s: Fail" % self.volname)
        g.log.info("Volume %s expanded: Success", self.volname)

        # Validate IO on current mount point
        self.assertTrue(validate_io_procs(io_process, self.mounts),
                        'IO Failed on clients')
        g.log.info('I/O successful on all the clients')

        self.assertTrue(self.unmount_volume(self.mounts),
                        'Unmount end points: Fail')
        g.log.info("Unmount mount points: Success")

        g.log.info('Add brick during IO operations successfully')
    def test_rebalance_with_force(self):

        # Getting arequal checksum before rebalance
        g.log.info("Getting arequal checksum before rebalance")
        arequal_checksum_before_rebalance = collect_mounts_arequal(self.mounts)

        # Log Volume Info and Status before expanding the volume.
        g.log.info("Logging volume info and Status before expanding volume")
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(
            ret, "Logging volume info and status failed on "
            "volume %s" % self.volname)
        g.log.info(
            "Successful in logging volume info and"
            "status of volume %s", self.volname)

        # Expanding volume by adding bricks to the volume
        g.log.info("Start adding bricks to volume")
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Volume %s: Expand failed", self.volname))
        g.log.info("Volume %s: Expand successful", self.volname)

        # Wait for gluster processes to come online
        g.log.info("Wait for gluster processes to come online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Volume %s: one or more volume process are "
                              "not up", self.volname))
        g.log.info("All volume %s processes are online", self.volname)

        # Verify volume's all process are online
        g.log.info("Verifying volume's all process are online")
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Volume %s : All process are not online", self.volname))
        g.log.info("Volume %s : All process are online", self.volname)

        # Log Volume Info and Status after expanding the volume
        g.log.info("Logging volume info and Status after expanding volume")
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Logging volume info and status failed on "
                              "volume %s", self.volname))
        g.log.info(
            "Successful in logging volume info and"
            "status of volume %s", self.volname)

        # Start Rebalance with force
        g.log.info("Starting Rebalance on the volume")
        ret, _, _ = rebalance_start(self.mnode, self.volname, force=True)
        self.assertEqual(ret, 0, ("Volume %s: Failed to start rebalance with "
                                  "force", self.volname))
        g.log.info("Volume %s: Started rebalance with force option",
                   self.volname)

        # Wait for rebalance to complete
        g.log.info("Waiting for rebalance to complete")
        ret = wait_for_rebalance_to_complete(self.mnode,
                                             self.volname,
                                             timeout=600)
        self.assertTrue(
            ret, ("Volume %s: Rebalance is still in-progress ", self.volname))
        g.log.info("Volume %s: Rebalance completed", self.volname)

        # Getting arequal checksum after rebalance
        g.log.info("Getting arequal checksum after rebalance with force "
                   "option")
        arequal_checksum_after_rebalance = collect_mounts_arequal(self.mounts)

        # Comparing arequals checksum before and after rebalance with force
        # option
        g.log.info("Comparing arequals checksum before and after rebalance"
                   "with force option")
        self.assertEqual(arequal_checksum_before_rebalance,
                         arequal_checksum_after_rebalance,
                         "arequal checksum is NOT MATCHNG")
        g.log.info("arequal checksum is SAME")

        # Checking if rebalance skipped any files
        status = get_rebalance_status(self.mnode, self.volname)
        for each_node in status['node']:
            self.assertEqual(
                int(each_node['skipped']), 0,
                "Few files are skipped on node %s" % each_node['nodeName'])
            g.log.info("No files are skipped on %s", each_node['nodeName'])