示例#1
0
    def test_add_brick_while_remove_brick_is_in_progress(self):
        # DHT Layout and hash validation
        g.log.debug("Verifying hash layout values %s:%s", self.clients[0],
                    self.mounts[0].mountpoint)
        ret = validate_files_in_dir(self.clients[0],
                                    self.mounts[0].mountpoint,
                                    test_type=LAYOUT_IS_COMPLETE,
                                    file_type=FILETYPE_DIRS)
        self.assertTrue(ret, "LAYOUT_IS_COMPLETE: FAILED")
        g.log.info("LAYOUT_IS_COMPLETE: PASS")

        # Log Volume Info and Status before shrinking the volume.
        g.log.info("Logging volume info and Status before shrinking volume")
        log_volume_info_and_status(self.mnode, self.volname)

        # Form bricks list for volume shrink
        self.remove_brick_list = form_bricks_list_to_remove_brick(
            self.mnode, self.volname, subvol_name=1)
        self.assertIsNotNone(self.remove_brick_list, ("Volume %s: Failed to "
                                                      "form bricks list for "
                                                      "shrink", self.volname))
        g.log.info("Volume %s: Formed bricks list for shrink", self.volname)

        # Shrink volume by removing bricks
        g.log.info("Start removing bricks from volume")
        ret, _, _ = remove_brick(self.mnode, self.volname,
                                 self.remove_brick_list, "start")
        self.assertEqual(ret, 0, ("Volume %s shrink failed ", self.volname))
        g.log.info("Volume %s shrink started ", self.volname)
        # Log remove-brick status
        g.log.info("Logging Remove-brick status")
        ret, out, err = remove_brick(self.mnode, self.volname,
                                     self.remove_brick_list, "status")
        self.assertEqual(ret, 0,
                         ("Remove-brick status failed on %s ", self.volname))
        g.log.info("Remove-brick status %s", self.volname)
        g.log.info(out)

        # Expanding volume while volume shrink is in-progress
        g.log.info("Volume %s: Expand volume while volume shrink in-progress",
                   self.volname)
        _, _, err = add_brick(self.mnode, self.volname, self.add_brick_list)
        self.assertIn(
            "rebalance is in progress", err, "Successfully added"
            "bricks to the volume <NOT EXPECTED>")
        g.log.info(
            "Volume %s: Failed to add-bricks while volume shrink "
            "in-progress <EXPECTED>", self.volname)

        # cleanup add-bricks list
        for brick in self.add_brick_list:
            brick_node, brick_path = brick.split(":")
            ret, _, _ = g.run(brick_node, ("rm -rf %s", brick_path))
            if ret != 0:
                g.log.error("Failed to clean %s:%s", brick_node, brick_path)
        g.log.info("Successfully cleaned backend add-brick bricks list")
    def test_wipe_out_directory_permissions(self):
        """
        Test case:
        1. Create a 1 brick pure distributed volume.
        2. Start the volume and mount it on a client node using FUSE.
        3. Create a directory on the mount point.
        4. Check trusted.glusterfs.dht xattr on the backend brick.
        5. Add brick to the volume using force.
        6. Do lookup from the mount point.
        7. Check the directory permissions from the backend bricks.
        8. Check trusted.glusterfs.dht xattr on the backend bricks.
        9. From mount point cd into the directory.
        10. Check the directory permissions from backend bricks.
        11. Check trusted.glusterfs.dht xattr on the backend bricks.
        """
        # Create a directory on the mount point
        self.dir_path = "{}/dir".format(self.mounts[0].mountpoint)
        ret = mkdir(self.first_client, self.dir_path)
        self.assertTrue(ret, "Failed to create directory dir")

        # Check trusted.glusterfs.dht xattr on the backend brick
        self._check_trusted_glusterfs_dht_on_all_bricks()

        # Add brick to the volume using force
        brick_list = form_bricks_list(self.mnode, self.volname, 1,
                                      self.servers, self.all_servers_info)
        self.assertIsNotNone(brick_list,
                             "Failed to get available space on mount point")
        ret, _, _ = add_brick(self.mnode, self.volname, brick_list, force=True)
        self.assertEqual(ret, 0,
                         ("Volume {}: Add-brick failed".format(self.volname)))

        # Do a lookup from the mount point
        cmd = "ls -lR {}".format(self.dir_path)
        ret, _, _ = g.run(self.first_client, cmd)
        self.assertEqual(ret, 0, "Failed to lookup")
        g.log.info("Lookup successful")

        # Check the directory permissions from the backend bricks
        self._check_permissions_of_dir()

        # Check trusted.glusterfs.dht xattr on the backend bricks
        self._check_trusted_glusterfs_dht_on_all_bricks()

        # From mount point cd into the directory
        ret, _, _ = g.run(
            self.first_client,
            "cd {};cd ..;cd {}".format(self.dir_path, self.dir_path))
        self.assertEqual(ret, 0, "Unable to cd into dir from mount point")

        # Check the directory permissions from backend bricks
        self._check_permissions_of_dir()

        # Check trusted.glusterfs.dht xattr on the backend bricks
        self._check_trusted_glusterfs_dht_on_all_bricks()
    def test_add_brick_replace_brick_fix_layout(self):
        """
        Test case:
        1. Create a volume, start it and mount it.
        2. Create files and dirs on the mount point.
        3. Add bricks to the volume.
        4. Replace 2 old bricks to the volume.
        5. Trigger rebalance fix layout and wait for it to complete.
        6. Check layout on all the bricks through trusted.glusterfs.dht.
        """
        # Create directories with some files on mount point
        cmd = ("cd %s; for i in {1..10}; do mkdir dir$i; for j in {1..5};"
               " do dd if=/dev/urandom of=dir$i/file$j bs=1M count=1; done;"
               " done" % self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.first_client, cmd)
        self.assertFalse(ret, "Failed to create dirs and files.")

        # Orginal brick list before add brick
        brick_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(brick_list, "Empty present brick list")

        # Add bricks to the volume
        add_brick_list = form_bricks_list_to_add_brick(self.mnode,
                                                       self.volname,
                                                       self.servers,
                                                       self.all_servers_info)
        self.assertIsNotNone(add_brick_list, "Empty add brick list")

        ret, _, _ = add_brick(self.mnode, self.volname, add_brick_list)
        self.assertFalse(ret, "Failed to add bricks to the volume")
        g.log.info("Successfully added bricks to the volume")

        # Replace 2 old bricks to the volume
        for _ in range(0, 2):
            brick = choice(brick_list)
            self._replace_a_old_added_brick(brick)
            brick_list.remove(brick)

        # Start rebalance and wait for it to complete
        ret, _, _ = rebalance_start(self.mnode, self.volname, fix_layout=True)
        self.assertFalse(ret, "Failed to start rebalance on volume")

        ret = wait_for_fix_layout_to_complete(self.mnode,
                                              self.volname,
                                              timeout=800)
        self.assertTrue(ret, "Rebalance failed on volume")

        # Check layout on all the bricks through trusted.glusterfs.dht
        self._check_trusted_glusterfs_dht_on_all_bricks()
 def test_add_brick_peer_not_in_cluster(self):
     """ Test add bricks to the volume from the host which is not
     in the cluster.
     """
     # Form bricks list for add-brick
     bricks_list = get_subvols(self.mnode,
                               self.volname)['volume_subvols'][0]
     for (i, item) in enumerate(bricks_list):
         server, _ = item.split(":")
         item.replace(server, "abc.def.ghi.jkl")
         bricks_list[i] = item.replace(server, "abc.def.ghi.jkl")
     g.log.info("Adding bricks to the volume %s from the host which is not"
                " in the cluster", self.volname)
     _, _, err = add_brick(self.mnode, self.volname, bricks_list)
     self.assertIn("Pre-validation failed on localhost", err,
                   "add-brick is successful")
     g.log.info("Volume add-brick failed with error %s ", err)
    def test_client_io_threads_on_replicate_volumes(self):
        """
        Test case 1:
        1. Create distrubuted volume and start it.
        2. Check the value of performance.client-io-threads it should be ON.
        3. io-threads should be loaded in trusted-.tcp-fuse.vol.
        4. Add brick to convert to replicate volume.
        5. Check the value of performance.client-io-threads it should be OFF.
        6. io-threads shouldn't be loaded in trusted-.tcp-fuse.vol.
        7. Remove brick so thate volume type is back to distributed.
        8. Check the value of performance.client-io-threads it should be ON.
        9. performance.client-io-threads should be loaded in
           trusted-.tcp-fuse.vol.

        Test case 2:
        1. Create a replicate volume and start it.
        2. Set performance.client-io-threads to ON.
        3. Check the value of performance.client-io-threads it should be ON.
        4. io-threads should be loaded in trusted-.tcp-fuse.vol.
        5. Add bricks to convert to make the volume 2x3.
        6. Check the value of performance.client-io-threads it should be ON.
        7. io-threads should be loaded in trusted-.tcp-fuse.vol.
        8. Remove brick to make the volume 1x3 again.
        9. Check the value of performance.client-io-threads it should be ON.
        10. performance.client-io-threads should be loaded in
            trusted-.tcp-fuse.vol.
        """
        # If volume type is distributed then run test case 1.
        if self.volume_type == "distributed":

            # Check the value of performance.client-io-threads it should be ON
            # and io-threads should be loaded in trusted-.tcp-fuse.vol
            self._check_value_of_performance_client_io_threads()

            # Add brick to convert to replicate volume
            brick = form_bricks_list_to_add_brick(self.mnode, self.volname,
                                                  self.servers,
                                                  self.all_servers_info)
            self.assertIsNotNone(brick,
                                 "Failed to form brick list to add brick")

            ret, _, _ = add_brick(self.mnode, self.volname, brick,
                                  force=True, replica_count=2)
            self.assertFalse(ret, "Failed to add brick on volume %s"
                             % self.volname)
            g.log.info("Add-brick successful on volume")

            # Check the value of performance.client-io-threads it should be ON
            # and io-threads should be loaded in trusted-.tcp-fuse.vol
            self._check_value_of_performance_client_io_threads(enabled=False)

            # Remove brick so thate volume type is back to distributed
            ret = shrink_volume(self.mnode, self.volname, replica_num=1)
            self.assertTrue(ret, "Failed to remove-brick from volume")
            g.log.info("Remove-brick successful on volume")

            # Check the value of performance.client-io-threads it should be ON
            # and io-threads should be loaded in trusted-.tcp-fuse.vol
            self._check_value_of_performance_client_io_threads()

        # If volume type is replicated then run test case 2.
        else:
            # Set performance.client-io-threads to ON
            options = {"performance.client-io-threads": "on"}
            ret = set_volume_options(self.mnode, self.volname, options)
            self.assertTrue(ret, "Unable to set volume option %s for"
                            "volume %s" % (options, self.volname))
            g.log.info("Successfully set %s for volume %s",
                       options, self.volname)

            # Check the value of performance.client-io-threads it should be ON
            # and io-threads should be loaded in trusted-.tcp-fuse.vol
            self._check_value_of_performance_client_io_threads()

            # Add bricks to convert to make the volume 2x3
            ret = expand_volume(self.mnode, self.volname, self.servers,
                                self.all_servers_info)
            self.assertTrue(ret, "Failed to add brick on volume %s"
                            % self.volname)
            g.log.info("Add-brick successful on volume")

            # Check the value of performance.client-io-threads it should be ON
            # and io-threads should be loaded in trusted-.tcp-fuse.vol
            self._check_value_of_performance_client_io_threads()

            # Remove brick to make the volume 1x3 again
            ret = shrink_volume(self.mnode, self.volname)
            self.assertTrue(ret, "Failed to remove-brick from volume")
            g.log.info("Remove-brick successful on volume")

            # Check the value of performance.client-io-threads it should be ON
            # and io-threads should be loaded in trusted-.tcp-fuse.vol
            self._check_value_of_performance_client_io_threads()
    def test_rebalance_stop_with_large_file(self):
        """
        Testcase Steps:
        1. Create and start a volume.
        2. Mount volume on client and create a large file.
        3. Add bricks to the volume and check layout
        4. Rename the file such that it hashs to different
           subvol.
        5. Start rebalance on volume.
        6. Stop rebalance on volume.
        """
        # Create file BIG1.
        command = ("dd if=/dev/urandom of={}/BIG1 bs=1024K count=10000".format(
            self.mounts[0].mountpoint))
        ret, _, _ = g.run(self.mounts[0].client_system, command)
        self.assertEqual(ret, 0, "Unable to create file I/O failed")
        g.log.info('Successfully created file BIG1.')

        # Checking if file created on correct subvol or not.
        ret = validate_files_in_dir(
            self.mounts[0].client_system,
            self.mounts[0].mountpoint,
            file_type=k.FILETYPE_FILES,
            test_type=k.TEST_FILE_EXISTS_ON_HASHED_BRICKS)
        self.assertTrue(ret, "Files not created on correct subvol.")
        g.log.info("File BIG1 is on correct subvol according to "
                   "the hash value")

        # Adding brick to volume
        add_brick_list = form_bricks_list_to_add_brick(self.mnode,
                                                       self.volname,
                                                       self.servers,
                                                       self.all_servers_info)
        ret, _, _ = add_brick(self.mnode, self.volname, add_brick_list)
        self.assertEqual(ret, 0, "Unable to add bricks to volume")
        g.log.info("Successfully added bricks to volume.")

        # Check if brick is added successfully or not.
        current_bricks = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(
            current_bricks, "Unable to get "
            "current active bricks of volume")
        g.log.info("Successfully got active bricks of volume.")
        for brick in add_brick_list:
            self.assertIn(brick, current_bricks,
                          ("Brick %s is not added to volume" % brick))

        # Create directory testdir.
        ret = mkdir(self.mounts[0].client_system,
                    self.mounts[0].mountpoint + '/testdir')
        self.assertTrue(ret, "Failed to create testdir directory")
        g.log.info("Successfuly created testdir directory.")

        # Layout should be set on the new brick and should be
        # continous and complete
        ret = validate_files_in_dir(self.mounts[0].client_system,
                                    self.mounts[0].mountpoint + '/testdir',
                                    test_type=k.TEST_LAYOUT_IS_COMPLETE)
        self.assertTrue(ret, "Layout not set for the new subvol")
        g.log.info("New subvol has been added successfully")

        # Rename file so that it gets hashed to different subvol
        file_index = 0
        path_info_dict = get_pathinfo(self.mounts[0].client_system,
                                      self.mounts[0].mountpoint + '/BIG1')
        initial_brick_set = path_info_dict['brickdir_paths']

        while True:
            # Calculate old_filename and new_filename and rename.
            file_index += 1
            old_filename = "{}/BIG{}".format(self.mounts[0].mountpoint,
                                             file_index)
            new_filename = "{}/BIG{}".format(self.mounts[0].mountpoint,
                                             (file_index + 1))
            ret, _, _ = g.run(self.mounts[0].client_system,
                              "mv {} {}".format(old_filename, new_filename))
            self.assertEqual(ret, 0, "Rename not successful")

            # Checking if it was moved to new subvol or not.
            path_info_dict = get_pathinfo(
                self.mounts[0].client_system,
                self.mounts[0].mountpoint + '/BIG%d' % (file_index + 1))
            if path_info_dict['brickdir_paths'] != initial_brick_set:
                break
        g.log.info("file renamed successfully")

        # Start rebalance on volume
        ret, _, _ = rebalance_start(self.mnode, self.volname, fix_layout=False)
        self.assertEqual(ret, 0, "Rebalance did not start")
        g.log.info("Rebalance started successfully on volume %s", self.volname)

        # Stop rebelance on volume
        ret, _, _ = rebalance_stop(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Rebalance stop command did not execute.")
        g.log.info("Rebalance stopped successfully on volume %s", self.volname)

        # Get rebalance status in xml
        command = ("gluster volume rebalance {} status --xml".format(
            self.volname))
        ret, _, _ = g.run(self.mnode, command)
        self.assertEqual(
            ret, 1, "Unexpected: Rebalance still running "
            "even after stop.")
        g.log.info("Rebalance is not running after stop.")
    def test_create_vol_used_bricks(self):
        '''
        -> Create distributed-replica Volume
        -> Add 6 bricks to the volume
        -> Mount the volume
        -> Perform some I/O's on mount point
        -> unmount the volume
        -> Stop and delete the volume
        -> Create another volume using bricks of deleted volume
        '''

        # Create and start a volume
        self.volume['name'] = "test_create_vol_with_fresh_bricks"
        self.volname = "test_create_vol_with_fresh_bricks"
        ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
        self.assertTrue(ret, "Failed to create and start volume")

        # Forming brick list
        brick_list = form_bricks_list(self.mnode, self.volname, 6,
                                      self.servers, self.all_servers_info)
        # Adding bricks to the volume
        ret, _, _ = add_brick(self.mnode, self.volname, brick_list)
        self.assertEqual(
            ret, 0, "Failed to add bricks to the volume %s" % self.volname)
        g.log.info("Bricks added successfully to the volume %s", self.volname)

        # Mounting the volume.
        for mount_obj in self.mounts:
            ret, _, _ = mount_volume(self.volname,
                                     mtype=self.mount_type,
                                     mpoint=mount_obj.mountpoint,
                                     mserver=self.mnode,
                                     mclient=mount_obj.client_system)
            self.assertEqual(ret, 0,
                             ("Volume %s is not mounted") % (self.volname))
            g.log.info("Volume mounted successfully : %s", self.volname)

        # run IOs
        g.log.info("Starting IO on all mounts...")
        self.all_mounts_procs = []
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = (
                "/usr/bin/env python %s create_deep_dirs_with_files "
                "--dirname-start-num %d --dir-depth 2 "
                "--dir-length 5 --max-num-of-dirs 3 "
                "--num-of-files 10 %s" %
                (self.script_upload_path, self.counter, mount_obj.mountpoint))

            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
            self.counter = self.counter + 10

        # Validate IO
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")

        # Unmouting the volume.
        for mount_obj in self.mounts:
            ret, _, _ = umount_volume(mclient=mount_obj.client_system,
                                      mpoint=mount_obj.mountpoint)
            self.assertEqual(ret, 0,
                             "Volume %s is not unmounted" % (self.volname))
            g.log.info("Volume unmounted successfully : %s", self.volname)

        # Getting brick list
        self.brick_list = get_all_bricks(self.mnode, self.volname)
        if not self.brick_list:
            raise ExecutionError("Failed to get the brick list of %s" %
                                 self.volname)

        # Stop volume
        ret, _, _ = volume_stop(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to stop the volume %s" % self.volname)
        g.log.info("Volume %s stopped successfully", self.volname)

        # Delete Volume
        ret, _, _ = g.run(
            self.mnode,
            "gluster volume delete %s --mode=script" % self.volname)
        self.assertEqual(ret, 0, "Failed to delete volume %s" % self.volname)
        g.log.info("Volume deleted successfully %s", self.volname)

        # Create another volume by using bricks of deleted volume
        self.volname = "test_create_vol_used_bricks"
        ret, _, err = volume_create(self.mnode,
                                    self.volname,
                                    brick_list[0:6],
                                    replica_count=3)
        self.assertNotEqual(
            ret, 0, "Volume creation should fail with used "
            "bricks but volume creation success")
        g.log.info("Failed to create volume with used bricks")

        # Checking failed message of volume creation
        msg = ' '.join([
            'volume create: test_create_vol_used_bricks: failed:',
            brick_list[0].split(':')[1], 'is already part of a volume'
        ])
        self.assertIn(
            msg, err, "Incorrect error message for volume creation "
            "with used bricks")
        g.log.info("correct error message for volume creation with "
                   "used bricks")
    def test_quota_add_brick(self):
        """
        Verifying quota functionality with respect to the
        add-brick without rebalance

        * Enable Quota
        * Set limit of 1GB on /
        * Mount the volume
        * Create some random amount of data inside each directory until quota
          is reached
        * Perform a quota list operation
        * Perform add-brick
        * Trying add files and see if quota is honored.
        """
        # Enable Quota
        g.log.info("Enabling quota on the volume %s", self.volname)
        ret, _, _ = quota_enable(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, ("Failed to enable quota on the volume %s", self.volname))
        g.log.info("Successfully enabled quota on the volume %s", self.volname)

        # Path to set quota limit
        path = "/"

        # Set Quota limit on the root of the volume
        g.log.info("Set Quota Limit on the path %s of the volume %s", path,
                   self.volname)
        ret, _, _ = quota_limit_usage(self.mnode,
                                      self.volname,
                                      path=path,
                                      limit="1GB")
        self.assertEqual(ret, 0, ("Failed to set quota limit on path %s of "
                                  "the volume %s", path, self.volname))
        g.log.info("Successfully set the Quota limit on %s of the volume %s",
                   path, self.volname)

        # Set soft timeout to 0 second
        g.log.info("Set quota soft timeout:")
        ret, _, _ = quota_set_soft_timeout(self.mnode, self.volname, '0sec')
        self.assertEqual(ret, 0, ("Failed to set soft timeout"))
        g.log.info("Quota soft timeout set successful")

        # Set hard timeout to 0 second
        g.log.info("Set quota hard timeout:")
        ret, _, _ = quota_set_hard_timeout(self.mnode, self.volname, '0sec')
        self.assertEqual(ret, 0, ("Failed to set hard timeout"))
        g.log.info("Quota hard timeout set successful")

        mount_obj = self.mounts[0]
        mount_dir = mount_obj.mountpoint
        client = mount_obj.client_system

        # Create data inside each directory from mount point
        g.log.info("Creating Files on %s:%s", client, mount_dir)
        cmd = ("cd %s/ ; "
               "for i in `seq 100` ; "
               "do dd if=/dev/zero of=testfile1$i "
               "bs=10M "
               "count=1 ; "
               "done" % (mount_dir))
        ret, _, _ = g.run(client, cmd)
        self.assertEqual(ret, 0, ("Failed to create files"))
        g.log.info("Files created successfully")

        # Quota validate
        ret = quota_validate(self.mnode,
                             self.volname,
                             path=path,
                             hard_limit=1073741824,
                             sl_exceeded=True,
                             hl_exceeded=False)
        self.assertTrue(ret, "Quota validate Failed for /")

        # Add brick by forming the brick list
        # Form bricks list for add-brick command based on the voltype
        if 'replica_count' in self.volume['voltype']:
            new_bricks_count = self.volume['voltype']['replica_count']
        elif 'disperse_count' in self.volume['voltype']:
            new_bricks_count = self.volume['voltype']['disperse_count']
        else:
            new_bricks_count = 3
        bricks_list = form_bricks_list(self.mnode, self.volname,
                                       new_bricks_count, self.servers,
                                       self.all_servers_info)
        g.log.info("new brick list: %s", bricks_list)
        # Run add brick command
        ret, _, _ = add_brick(self.mnode, self.volname, bricks_list, False)
        self.assertEqual(ret, 0, "Failed to add the bricks to the volume")
        g.log.info("Successfully added bricks to volume")

        # Create data inside each directory from mount point
        g.log.info("Creating Files on %s:%s", client, mount_dir)
        cmd = ("cd %s/ ; "
               "for i in `seq 50` ; "
               "do dd if=/dev/zero of=testfile2$i "
               "bs=1M "
               "count=1 ; "
               "done" % (mount_dir))
        ret, _, _ = g.run(client, cmd)
        self.assertEqual(ret, 1, ("Failed: Files created successfully"))
        g.log.info("Quota limit honored")

        # Quota validate
        ret = quota_validate(self.mnode,
                             self.volname,
                             path=path,
                             hard_limit=1073741824,
                             sl_exceeded=True,
                             hl_exceeded=True)
        self.assertTrue(ret, "Quota validate Failed for /")
    def test_arb_to_repl_conversion_with_io(self):
        """
        Description: To perform a volume conversion from Arbiter to Replicated
        with background IOs

        Steps:
        - Create, start and mount an arbiter volume in two clients
        - Create two dir's, fill IO in first dir and take note of arequal
        - Start a continuous IO from second directory
        - Convert arbiter to x2 replicated volume (remove brick)
        - Convert x2 replicated to x3 replicated volume (add brick)
        - Wait for ~5 min for vol file to be updated on all clients
        - Enable client side heal options and issue volume heal
        - Validate heal completes with no errors and arequal of first dir
          matches against initial checksum
        """

        client, m_point = (self.mounts[0].client_system,
                           self.mounts[0].mountpoint)

        # Fill IO in first directory
        cmd = ('/usr/bin/env python {} '
               'create_deep_dirs_with_files --dir-depth 10 '
               '--fixed-file-size 1M --num-of-files 100 '
               '--dirname-start-num 1 {}'.format(self.script_path, m_point))
        ret, _, _ = g.run(client, cmd)
        self.assertEqual(ret, 0, 'Not able to fill directory with IO')

        # Take `arequal` checksum on first directory
        ret, exp_arequal = collect_mounts_arequal(self.mounts[0],
                                                  m_point + '/user1')
        self.assertTrue(ret, 'Failed to get arequal checksum on mount')

        # Start continuous IO from second directory
        client = self.mounts[1].client_system
        cmd = ('/usr/bin/env python {} '
               'create_deep_dirs_with_files --dir-depth 10 '
               '--fixed-file-size 1M --num-of-files 250 '
               '--dirname-start-num 2 {}'.format(self.script_path, m_point))
        proc = g.run_async(client, cmd)
        self.all_mounts_procs.append(proc)

        # Wait for IO to fill before volume conversion
        sleep(30)

        # Remove arbiter bricks ( arbiter to x2 replicated )
        kwargs = {'replica_count': 2}
        ret, _, _ = remove_brick(self.mnode,
                                 self.volname,
                                 self._get_arbiter_bricks(),
                                 option='force',
                                 **kwargs)
        self.assertEqual(ret, 0, 'Not able convert arbiter to x2 replicated '
                         'volume')
        # Wait for IO to fill after volume conversion
        sleep(30)

        # Add bricks (x2 replicated to x3 replicated)
        kwargs['replica_count'] = 3
        vol_info = get_volume_info(self.mnode, volname=self.volname)
        self.assertIsNotNone(vol_info, 'Not able to get volume info')
        dist_count = vol_info[self.volname]['distCount']
        bricks_list = form_bricks_list(
            self.mnode,
            self.volname,
            number_of_bricks=int(dist_count) * 1,
            servers=self.servers,
            servers_info=self.all_servers_info,
        )
        self.assertTrue(bricks_list, 'Not able to get unused list of bricks')
        ret, _, _ = add_brick(self.mnode,
                              self.volname,
                              bricks_list,
                              force='True',
                              **kwargs)
        self.assertEqual(ret, 0, 'Not able to add-brick to '
                         '{}'.format(self.volname))
        # Wait for IO post x3 replicated volume conversion
        sleep(30)

        # Validate volume info
        vol_info = get_volume_info(self.mnode, volname=self.volname)
        self.assertIsNotNone(vol_info, 'Not able to get volume info')
        vol_info = vol_info[self.volname]
        repl_count, brick_count = (vol_info['replicaCount'],
                                   vol_info['brickCount'])

        # Wait for the volfile to sync up on clients
        cmd = ('grep -ir connected {}/.meta/graphs/active/{}-client-*/private '
               '| wc -l')
        wait_time = time() + 300
        in_sync = False
        while time() <= wait_time:
            ret, rout, _ = g.run(client, cmd.format(m_point, self.volname))
            self.assertEqual(ret, 0,
                             'Not able to grep for volfile sync from client')
            if int(rout) == int(brick_count):
                in_sync = True
                break
            sleep(30)
        self.assertTrue(
            in_sync, 'Volfiles from clients are not synced even '
            'after polling for ~5 min')

        self.assertEqual(
            int(repl_count), kwargs['replica_count'], 'Not able '
            'to validate x2 to x3 replicated volume conversion')

        # Enable client side heal options, trigger and monitor heal
        ret = set_volume_options(
            self.mnode, self.volname, {
                'data-self-heal': 'on',
                'entry-self-heal': 'on',
                'metadata-self-heal': 'on'
            })
        self.assertTrue(ret, 'Unable to set client side heal options')
        ret = trigger_heal(self.mnode, self.volname)
        self.assertTrue(ret, 'Unable to trigger heal on volume')
        ret = monitor_heal_completion(self.mnode,
                                      self.volname,
                                      timeout_period=1800)
        self.assertTrue(ret,
                        'Heal is not completed for {}'.format(self.volname))

        # Validate IO
        prev_time = datetime.now().replace(microsecond=0)
        ret = validate_io_procs(self.all_mounts_procs, [self.mounts[1]])
        curr_time = datetime.now().replace(microsecond=0)
        self.assertTrue(ret, 'Not able to validate completion of IO on mount')
        self.all_mounts_procs *= 0

        # To ascertain IO was happening during brick operations
        self.assertGreater(
            curr_time - prev_time, timedelta(seconds=10), 'Unable '
            'to validate IO was happening during brick operations')

        # Take and validate `arequal` checksum on first directory
        ret, act_areequal = collect_mounts_arequal(self.mounts[1],
                                                   m_point + '/user1')
        self.assertTrue(ret, 'Failed to get arequal checksum from mount')
        self.assertEqual(
            exp_arequal, act_areequal, '`arequal` checksum did '
            'not match post arbiter to x3 replicated volume conversion')

        g.log.info('PASS: Arbiter to x3 replicated volume conversion complete')
示例#10
0
    def test_add_brick_functionality(self):

        # create and start volume
        self.assertTrue(
            setup_volume(self.mnode, self.all_servers_info, self.volume),
            "Failed to create and start volume %s" % self.volname)
        g.log.info("Volume created and started successfully")

        # form bricks list to test add brick functionality
        replica_count_of_volume = self.volume['voltype']['replica_count']
        num_of_bricks = 4 * replica_count_of_volume
        self.bricks_list = form_bricks_list(self.mnode, self.volname,
                                            num_of_bricks,
                                            self.servers,
                                            self.all_servers_info)
        self.assertIsNotNone(self.bricks_list, "Bricks list is None")

        # Try to add a single brick to volume, which should fail as it is a
        # replicated volume, we should pass multiple of replica count number
        # of bricks
        self.assertNotEqual(
            add_brick(self.mnode, self.volname, self.bricks_list[0])[0], 0,
            "Expected: It should fail to add a single brick to a replicated "
            "volume. Actual: Successfully added single brick to volume")
        g.log.info("Failed to add a single brick to replicated volume "
                   "(as expected)")

        # add brick replica count number of bricks in which one is a
        # non existing brick (not using the brick used in the earlier test)
        kwargs = {'replica_count': replica_count_of_volume}
        bricks_to_add = self.bricks_list[1:replica_count_of_volume + 1]
        # make one of the bricks a non-existing one (randomly)
        random_index = random.randint(0, replica_count_of_volume - 1)
        bricks_to_add[random_index] += "/non_existing_brick"

        self.assertNotEqual(
            add_brick(self.mnode, self.volname, bricks_to_add, **kwargs)[0], 0,
            "Expected: It should fail to add a non existing brick to volume. "
            "Actual: Successfully added a non existing brick to volume")
        g.log.info("Failed to add a non existing brick to volume "
                   "(as expected)")

        # add a brick from a node which is not a part of the cluster
        # (not using bricks used in earlier tests)
        bricks_to_add = self.bricks_list[replica_count_of_volume + 1:
                                         (2 * replica_count_of_volume) + 1]
        # change one (random) brick's node name to a non existent node
        random_index = random.randint(0, replica_count_of_volume - 1)
        brick_to_change = bricks_to_add[random_index].split(":")
        brick_to_change[0] = "abc.def.ghi.jkl"
        bricks_to_add[random_index] = ":".join(brick_to_change)
        self.assertNotEqual(
            add_brick(self.mnode, self.volname, bricks_to_add, **kwargs)[0], 0,
            "Expected: It should fail to add brick from a node which is not "
            "part of a cluster. Actual: Successfully added bricks from node "
            "which is not a part of cluster to volume")
        g.log.info("Failed to add bricks from node which is not a part of "
                   "cluster to volume (as expected)")

        # add correct number of valid bricks, it should succeed
        # (not using bricks used in earlier tests)
        bricks_to_add = self.bricks_list[(2 * replica_count_of_volume) + 1:
                                         (3 * replica_count_of_volume) + 1]
        self.assertEqual(
            add_brick(self.mnode, self.volname, bricks_to_add, **kwargs)[0], 0,
            "Failed to add the bricks to the volume")
        g.log.info("Successfully added bricks to volume")

        # Perform rebalance start operation
        self.assertEqual(rebalance_start(self.mnode, self.volname)[0], 0,
                         "Rebalance start failed")
    def test_delete_file_in_migration(self):
        """
        Verify that if a file is picked for migration and then deleted, the
        file should be removed successfully.
        * First create a big data file of 10GB.
        * Rename that file, such that after rename a linkto file is created
          (we are doing this to make sure that file is picked for migration.)
        * Add bricks to the volume and trigger rebalance using force option.
        * When the file has been picked for migration, delete that file from
          the mount point.
        * Check whether the file has been deleted or not on the mount-point
          as well as the back-end bricks.
        """

        # pylint: disable=too-many-statements
        # pylint: disable=too-many-locals
        # pylint: disable=protected-access

        mountpoint = self.mounts[0].mountpoint

        # Location of source file
        src_file = mountpoint + '/file1'

        # Finding a file name such that renaming source file to it will form a
        # linkto file
        subvols = (get_subvols(self.mnode, self.volname))['volume_subvols']
        newhash = find_new_hashed(subvols, "/", "file1")
        new_name = str(newhash.newname)
        new_host = str(newhash.hashedbrickobject._host)
        new_name_path = str(newhash.hashedbrickobject._fqpath)[:-2]

        # Location of destination file to which source file will be renamed
        dst_file = '{}/{}'.format(mountpoint, new_name)
        # Create a 10GB file source file
        cmd = (
            "dd if=/dev/urandom of={} bs=1024K count=10000".format(src_file))
        ret, _, _ = g.run(self.clients[0], cmd)
        self.assertEqual(ret, 0, ("File {} creation failed".format(src_file)))

        # Move file such that it hashes to some other subvol and forms linkto
        # file
        ret = move_file(self.clients[0], src_file, dst_file)
        self.assertTrue(ret, "Rename failed")
        g.log.info('Renamed file %s to %s', src_file, dst_file)

        # Check if "file_two" is linkto file
        ret = is_linkto_file(new_host, '{}/{}'.format(new_name_path, new_name))
        self.assertTrue(ret, "File is not a linkto file")
        g.log.info("File is linkto file")

        # Expanding volume by adding bricks to the volume
        ret, _, _ = add_brick(self.mnode,
                              self.volname,
                              self.add_brick_list,
                              force=True)
        self.assertEqual(ret, 0,
                         ("Volume {}: Add-brick failed".format(self.volname)))
        g.log.info("Volume %s: add-brick successful", self.volname)

        # Log Volume Info and Status after expanding the volume
        log_volume_info_and_status(self.mnode, self.volname)

        # Start Rebalance
        ret, _, _ = rebalance_start(self.mnode, self.volname, force=True)
        self.assertEqual(
            ret, 0,
            ("Volume {}: Failed to start rebalance".format(self.volname)))
        g.log.info("Volume %s : Rebalance started ", self.volname)

        # Check if rebalance is running and delete the file
        status_info = get_rebalance_status(self.mnode, self.volname)
        status = status_info['aggregate']['statusStr']
        self.assertEqual(status, 'in progress', "Rebalance is not running")
        ret, _, _ = g.run(self.clients[0], (" rm -rf {}".format(dst_file)))
        self.assertEqual(ret, 0, ("Cannot delete file {}".format(dst_file)))
        g.log.info("File is deleted")

        # Check if the file is present on the mount point
        ret, _, _ = g.run(self.clients[0], ("ls -l {}".format(dst_file)))
        self.assertEqual(ret, 2, ("Failed to delete file {}".format(dst_file)))

        # Check if the file is present on the backend bricks
        bricks = get_all_bricks(self.mnode, self.volname)
        for brick in bricks:
            node, brick_path = brick.split(':')
            ret, _, _ = g.run(node, "ls -l {}/{}".format(brick_path, new_name))
            self.assertEqual(
                ret, 2, "File is still present on"
                " back-end brick: {}".format(brick_path))
            g.log.info("File is deleted from back-end brick: %s", brick_path)

        # Check if rebalance process is still running
        for server in self.servers:
            ret, _, _ = g.run(server, "pgrep rebalance")
            self.assertEqual(ret, 1, ("Rebalance process is still"
                                      " running on server {}".format(server)))
            g.log.info("Rebalance process is not running")
示例#12
0
    def test_readdirp_with_rebalance(self):
        """
        Description: Tests to check that all directories are read
                     and listed while rebalance is still in progress.

        Steps :
        1) Create a volume.
        2) Mount the volume using FUSE.
        3) Create a dir "master" on mount-point.
        4) Create 8000 empty dirs (dir1 to dir8000) inside dir "master".
        5) Now inside a few dirs (e.g. dir1 to dir10), create deep dirs
           and inside every dir, create 50 files.
        6) Collect the number of dirs present on /mnt/<volname>/master
        7) Change the rebalance throttle to lazy.
        8) Add-brick to the volume (at least 3 replica sets.)
        9) Start rebalance using "force" option on the volume.
        10) List the directories on dir "master".
        """
        # pylint: disable=too-many-statements
        # Start IO on mounts
        m_point = self.mounts[0].mountpoint
        ret = mkdir(self.mounts[0].client_system, "{}/master".format(m_point))
        self.assertTrue(ret, "mkdir of dir master failed")

        # Create 8000 empty dirs
        cmd = ("ulimit -n 64000; /usr/bin/env python {} create_deep_dir"
               " --dir-length 8000 --dir-depth 0"
               " {}/master/".format(self.script_upload_path, m_point))
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        self.all_mounts_procs.append(proc)
        g.log.info("IO on %s:%s is started successfully",
                   self.mounts[0].client_system, m_point)

        # Validate 8000 empty dirs are created successfully
        ret = validate_io_procs(self.all_mounts_procs, self.mounts[0])
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("IO is successful on all mounts")

        # Create deep dirs and files
        self.all_mounts_procs = []
        cmd = ("/usr/bin/env python {} create_deep_dirs_with_files"
               " --dir-length 10 --dir-depth 1 --max-num-of-dirs 50 "
               " --num-of-files 50 --file-type empty-file"
               " {}/master/".format(self.script_upload_path, m_point))
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        self.all_mounts_procs.append(proc)
        g.log.info("IO on %s:%s is started successfully",
                   self.mounts[0].client_system, m_point)

        # Validate deep dirs and files are created successfully
        ret = validate_io_procs(self.all_mounts_procs, self.mounts[0])
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("IO is successful on all mounts")

        # Check the dir count before rebalance
        cmd = ('cd {}/master; ls -l | wc -l'.format(m_point))
        ret, dir_count_before, _ = g.run(self.clients[0], cmd)
        self.assertEqual(ret, 0, "Failed to " "get directory count")
        g.log.info("Dir count before %s", dir_count_before)

        # Change the rebalance throttle to lazy
        ret, _, _ = set_rebalance_throttle(self.mnode,
                                           self.volname,
                                           throttle_type='lazy')
        self.assertEqual(ret, 0, "Failed to set rebal-throttle to lazy")
        g.log.info("Rebal-throttle set to 'lazy' successfully")

        # Add-bricks to the volume
        ret, _, _ = add_brick(self.mnode, self.volname, self.add_brick_list)
        self.assertEqual(ret, 0, "Failed to add-brick to the volume")
        g.log.info("Added bricks to the volume successfully")

        # Start rebalance using force
        ret, _, _ = rebalance_start(self.mnode, self.volname, force=True)
        self.assertEqual(ret, 0, "Failed to start rebalance")
        g.log.info("Rebalance started successfully")

        # Check if rebalance is in progress
        rebalance_status = get_rebalance_status(self.mnode, self.volname)
        status = rebalance_status['aggregate']['statusStr']
        self.assertEqual(status, "in progress",
                         ("Rebalance is not in 'in progress' state,"
                          " either rebalance is in compeleted state"
                          " or failed to get rebalance status"))

        # Check the dir count after rebalance
        cmd = ('cd {}/master; ls -l | wc -l'.format(m_point))
        ret, dir_count_after, _ = g.run(self.clients[0], cmd)
        self.assertEqual(ret, 0, "Failed to do lookup and"
                         " get directory count")
        g.log.info("Dir count after %s", dir_count_after)

        # Check if there is any data loss
        self.assertEqual(set(dir_count_before), set(dir_count_after),
                         ("There is data loss"))
        g.log.info("The checksum before and after rebalance is same."
                   " There is no data loss.")
示例#13
0
    def test_rebalance_start_status_stop(self):
        # pylint: disable=too-many-statements
        # Form brick list for expanding volume
        add_brick_list = form_bricks_list_to_add_brick(self.mnode,
                                                       self.volname,
                                                       self.servers,
                                                       self.all_servers_info,
                                                       distribute_count=1)
        self.assertIsNotNone(add_brick_list,
                             ("Volume %s: Failed to form "
                              "bricks list to expand", self.volname))
        g.log.info("Volume %s: Formed bricks list to expand", self.volname)

        # Expanding volume by adding bricks to the volume
        g.log.info("Volume %s: Expand start")
        ret, _, _ = add_brick(self.mnode, self.volname, add_brick_list)
        self.assertEqual(ret, 0, ("Volume %s: Expand failed", self.volname))
        g.log.info("Volume %s: Expand successful", self.volname)

        # Wait for gluster processes to come online
        g.log.info("Wait for gluster processes to come online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Volume %s: one or more volume process are "
                              "not up", self.volname))
        g.log.info("All volume %s processes are online", self.volname)

        # Log Volume Info and Status after expanding the volume
        g.log.info("Logging volume info and Status after expanding volume")
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(
            ret, "Logging volume info and status failed on "
            "volume %s" % self.volname)
        g.log.info(
            "Successful in logging volume info and status of volume "
            "%s", self.volname)

        # Verify volume's all process are online
        g.log.info("Verifying volume's all process are online")
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Volume %s : All process are not online", self.volname))
        g.log.info("Volume %s : All process are online", self.volname)

        # Getting arequal checksum before rebalance start
        g.log.info("Getting arequal before rebalance start")
        arequal_checksum_before_rebalance_start = collect_mounts_arequal(
            self.mounts)

        # Start Rebalance
        g.log.info("Starting Rebalance on the volume")
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, ("Volume %s: Failed to start rebalance", self.volname))
        g.log.info("Volume %s: Rebalance started ", self.volname)

        # Stop on-going rebalance
        g.log.info("Stop rebalance on the volume")
        ret, _, _ = rebalance_stop(self.mnode, self.volname)
        self.assertEqual(ret, 0,
                         ("Volume %s: Failed to stop rebalance", self.volname))
        g.log.info("Checking whether the migration is stopped or not")

        # Wait till the on-going file migration completes on all servers
        count = 0
        while count < 80:
            rebalance_count = 0
            for server in self.servers:
                ret, _, _ = g.run(server, "pgrep rebalance")
                if ret != 0:
                    rebalance_count += 1
            if rebalance_count == len(self.servers):
                break
            sleep(2)
            count += 1
        g.log.info("Volume %s: Rebalance process is not running on servers",
                   self.volname)

        # List all files and dirs from mount point
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        g.log.info("Listing all files and directories is successful")

        # Getting arequal checksum after the rebalance is stopped
        g.log.info("Getting arequal checksum after the rebalance is stopped")
        arequal_checksum_after_rebalance_stop = collect_mounts_arequal(
            self.mounts)

        # Comparing arequals checksum before start of rebalance and
        #                       after the rebalance is stopped
        g.log.info("Comparing arequals checksum before start of rebalance and"
                   "after the rebalance is stopped")
        self.assertEqual(arequal_checksum_before_rebalance_start,
                         arequal_checksum_after_rebalance_stop,
                         "arequal checksum is NOT MATCHNG")
        g.log.info("arequal checksum is SAME")
    def test_quota_rebalance_heal(self):
        """
        * Enable quota on the volume
        * set hard and soft time out to zero.
        * Create some files and directories from mount point
           so that the limits are reached.
        * Perform add-brick operation on the volume.
        * Start rebalance on the volume.
        * While rebalance is running, kill one of the bricks of the volume
          and start after a while.
        * While rebalance + self heal is in progress,
          create some more files and
          directories from the mount point until limit is hit
        """

        # Enable Quota
        g.log.info("Enabling quota on the volume %s", self.volname)
        ret, _, _ = quota_enable(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, ("Failed to enable quota on the volume %s", self.volname))
        g.log.info("Successfully enabled quota on the volume %s", self.volname)

        # Set the Quota timeouts to 0 for strict accounting
        ret, _, _ = quota_set_hard_timeout(self.mnode, self.volname, 0)
        self.assertEqual(
            ret, 0, ("Failed to set softtimeout ot 0 for %s", self.volname))

        ret, _, _ = quota_set_soft_timeout(self.mnode, self.volname, 0)
        self.assertEqual(
            ret, 0, ("Failed to set softtimeout ot 0 for %s", self.volname))
        g.log.info("soft and hard timeout has been set to 0 for %s",
                   self.volname)

        # Create Directories and files (write 4MB of data)
        for mount_object in self.mounts:
            g.log.info("Creating Directories on %s:%s",
                       mount_object.client_system, mount_object.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files -d 0 "
                   "-f 1024 -l 4 --fixed-file-size 1k %s" %
                   (self.script_upload_path, mount_object.mountpoint))

            proc = g.run_async(mount_object.client_system,
                               cmd,
                               user=mount_object.user)
            self.all_mounts_procs.append(proc)

        # Validate IO
        g.log.info("Wait for IO to complete and validate IO ...")
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("IO is successful on all mounts")

        # Set limit of 4 MB on root dir
        g.log.info("Set Quota Limit on root directory of the volume %s",
                   self.volname)
        ret, _, _ = quota_limit_usage(self.mnode, self.volname, '/', '4MB')
        self.assertEqual(ret, 0, "Failed to set Quota for dir /.")
        g.log.info("Set quota for dir / successfully.")

        # Add bricks
        replica_count_of_volume = self.volume['voltype']['replica_count']
        bricks_list = form_bricks_list(self.mnode, self.volname,
                                       replica_count_of_volume, self.servers,
                                       self.all_servers_info)
        g.log.info("new brick list: %s", str(bricks_list))
        ret, _, _ = add_brick(self.mnode, self.volname, bricks_list, False)
        self.assertEqual(ret, 0, "Failed to add the bricks to the volume")
        g.log.info("Successfully added bricks to volume")

        # Perform rebalance start operation
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Rebalance start is success")

        # Sleep until rebalance has done some work
        g.log.info("wait for rebalance to make progress")
        sleep(3)

        # Kill a brick and bring it up to trigerr self heal
        all_bricks = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(all_bricks, "unable to get list of bricks")
        g.log.info("bringing down brick: %s", all_bricks[0])
        ret = bring_bricks_offline(self.volname, all_bricks[0])
        self.assertTrue(ret, "unable to bring brick1 offline")
        g.log.info("Successfully brought the following brick offline "
                   ": %s", str(all_bricks[0]))

        ret = bring_bricks_online(self.mnode, self.volname, [all_bricks[0]])
        self.assertTrue(ret, "unable to bring %s online" % all_bricks[0])
        g.log.info("Successfully brought the following brick online "
                   ": %s", str(all_bricks[0]))

        # Do some more IO and check if hard limit is honoured
        all_mounts_procs = []
        for mount_object in self.mounts:
            cmd = ("/usr/bin/env python %s create_files "
                   "-f 100 --base-file-name file %s" %
                   (self.script_upload_path, mount_object.mountpoint))
            proc = g.run_async(mount_object.client_system,
                               cmd,
                               user=mount_object.user)
            all_mounts_procs.append(proc)

        # Validate I/O
        g.log.info("Wait for IO to complete and validate IO.....")
        ret = validate_io_procs(all_mounts_procs, self.mounts)
        self.assertFalse(ret, "Writes allowed past quota limit")
        g.log.info("Quota limits honored as expected")
示例#15
0
    def test_rebalance_status_from_newly_probed_node(self):

        # Peer probe first 3 servers
        servers_info_from_three_nodes = {}
        for server in self.servers[0:3]:
            servers_info_from_three_nodes[server] = self.all_servers_info[
                server]
            # Peer probe the first 3 servers
            ret, _, _ = peer_probe(self.mnode, server)
            self.assertEqual(ret, 0, "Peer probe failed to %s" % server)

        self.volume['servers'] = self.servers[0:3]
        # create a volume using the first 3 nodes
        ret = setup_volume(self.mnode,
                           servers_info_from_three_nodes,
                           self.volume,
                           force=True)
        self.assertTrue(
            ret, "Failed to create"
            "and start volume %s" % self.volname)

        # Mounting a volume
        ret = self.mount_volume(self.mounts)
        self.assertTrue(ret, "Volume mount failed for %s" % self.volname)

        # Checking volume mounted or not
        ret = is_mounted(self.volname, self.mounts[0].mountpoint, self.mnode,
                         self.mounts[0].client_system, self.mount_type)
        self.assertTrue(
            ret, "Volume not mounted on mount point: %s" %
            self.mounts[0].mountpoint)
        g.log.info("Volume %s mounted on %s", self.volname,
                   self.mounts[0].mountpoint)

        # run IOs
        g.log.info("Starting IO on all mounts...")
        self.counter = 1
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = (
                "python %s create_deep_dirs_with_files "
                "--dirname-start-num %d "
                "--dir-depth 10 "
                "--dir-length 5 "
                "--max-num-of-dirs 3 "
                "--num-of-files 100 %s" %
                (self.script_upload_path, self.counter, mount_obj.mountpoint))
            ret = g.run(mount_obj.client_system, cmd)
            self.assertEqual(ret, 0,
                             "IO failed on %s" % mount_obj.client_system)
            self.counter = self.counter + 10

        # add a brick to the volume and start rebalance
        brick_to_add = form_bricks_list(self.mnode, self.volname, 1,
                                        self.servers[0:3],
                                        servers_info_from_three_nodes)
        ret, _, _ = add_brick(self.mnode, self.volname, brick_to_add)
        self.assertEqual(ret, 0, "Failed to add a brick to %s" % self.volname)

        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to start rebalance")

        # peer probe a new node from existing cluster
        ret, _, _ = peer_probe(self.mnode, self.servers[3])
        self.assertEqual(ret, 0, "Peer probe failed")

        ret = get_rebalance_status(self.servers[3], self.volname)
        self.assertIsNone(ret, "Failed to get rebalance status")
    def test_add_brick_functionality(self):

        ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
        self.assertTrue(
            ret, ("Failed to create and start volume %s" % self.volname))
        g.log.info("Volume created and started succssfully")

        # form bricks list to test add brick functionality

        replica_count_of_volume = self.volume['voltype']['replica_count']
        num_of_bricks = 4 * replica_count_of_volume
        bricks_list = form_bricks_list(self.mnode, self.volname, num_of_bricks,
                                       self.servers, self.all_servers_info)
        self.assertIsNotNone(bricks_list, "Bricks list is None")

        # Try to add a single brick to volume, which should fail as it is a
        # replicated volume, we should pass multiple of replica count number
        # of bricks

        bricks_list_to_add = [bricks_list[0]]
        ret, out, err = add_brick(self.mnode, self.volname, bricks_list_to_add)
        self.assertNotEqual(
            ret, 0, "Expected: It should fail to add a single"
            "brick to a replicated volume. Actual: "
            "Successfully added single brick to volume")
        g.log.info("failed to add a single brick to replicated volume")

        # add brick replica count number of bricks in which one is
        # non existing brick
        kwargs = {}
        kwargs['replica_count'] = replica_count_of_volume

        bricks_list_to_add = bricks_list[1:replica_count_of_volume + 1]

        num_of_bricks = len(bricks_list_to_add)
        index_of_non_existing_brick = random.randint(0, num_of_bricks - 1)
        complete_brick = bricks_list_to_add[index_of_non_existing_brick]
        non_existing_brick = complete_brick + "/non_existing_brick"
        bricks_list_to_add[index_of_non_existing_brick] = non_existing_brick

        ret, out, err = add_brick(self.mnode, self.volname, bricks_list_to_add,
                                  False, **kwargs)
        self.assertNotEqual(
            ret, 0, "Expected: It should fail to add non"
            "existing brick to a volume. Actual: "
            "Successfully added non existing brick to volume")
        g.log.info("failed to add a non existing brick to volume")

        # adding brick from node which is not part of cluster
        bricks_list_to_add = bricks_list[replica_count_of_volume +
                                         1:(2 * replica_count_of_volume) + 1]

        num_of_bricks = len(bricks_list_to_add)
        index_of_node = random.randint(0, num_of_bricks - 1)
        complete_brick = bricks_list_to_add[index_of_node].split(":")
        complete_brick[0] = "abc.def.ghi.jkl"
        bricks_list_to_add[index_of_node] = ":".join(complete_brick)
        ret, out, err = add_brick(self.mnode, self.volname, bricks_list_to_add,
                                  False, **kwargs)
        self.assertNotEqual(
            ret, 0, "Expected: It should fail to add brick "
            "from a node which is not part of a cluster."
            "Actual:Successfully added bricks from node which"
            " is not a part of cluster to volume")

        g.log.info("Failed to add bricks form node which is not a part of "
                   "cluster to volume")

        # add correct number of valid bricks, it should succeed

        bricks_list_to_add = bricks_list[(2 * replica_count_of_volume) +
                                         1:(3 * replica_count_of_volume) + 1]
        ret, out, err = add_brick(self.mnode, self.volname, bricks_list_to_add,
                                  False, **kwargs)
        self.assertEqual(ret, 0, "Failed to add the bricks to the volume")
        g.log.info("Successfully added bricks to volume")

        # Perform rebalance start operation
        ret, out, err = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Rebalance start is success")
示例#17
0
    def test_peer_probe_status(self):

        # get FQDN of node1 and node2
        node1 = socket.getfqdn(self.mnode)
        node2 = socket.getfqdn(self.servers[1])

        # peer probe to a new node, N2 from N1
        ret, _, err = peer_probe(node1, node2)
        self.assertEqual(ret, 0, ("Peer probe failed to %s from %s with "
                                  "error message %s" % (self.servers[1],
                                                        self.mnode, err)))
        g.log.info("Peer probe from %s to %s is success", self.mnode,
                   self.servers[1])

        # check peer status in both the nodes, it should have FQDN
        # from node1
        ret, out, err = peer_status(self.mnode)
        self.assertEqual(ret, 0, ("Failed to get peer status from %s with "
                                  "error message %s" % (self.mnode, err)))
        g.log.info("Successfully got peer status from %s", self.mnode)

        self.assertIn(node2, out, ("FQDN of %s is not present in the "
                                   "output of peer status from %s"
                                   % (self.servers[1], self.mnode)))
        g.log.info("FQDN of %s is present in peer status of %s",
                   self.servers[1], self.mnode)

        # from node2
        ret, out, err = peer_status(self.servers[1])
        self.assertEqual(ret, 0, ("Failed to get peer status from %s with "
                                  "error message %s" % (self.servers[1], err)))
        g.log.info("Successfully got peer status from %s", self.servers[1])

        self.assertIn(node1, out, ("FQDN of %s is not present in the "
                                   "output of peer status from %s"
                                   % (self.mnode, self.servers[1])))
        g.log.info("FQDN of %s is present in peer status of %s",
                   self.mnode, self.servers[1])

        # create a distributed volume with 2 bricks
        servers_info_from_two_node_cluster = {}
        for server in self.servers[0:2]:
            servers_info_from_two_node_cluster[
                server] = self.all_servers_info[server]

        self.volume['servers'] = self.servers[0:2]
        self.volume['voltype']['dist_count'] = 2
        ret = setup_volume(self.mnode, servers_info_from_two_node_cluster,
                           self.volume)
        self.assertTrue(ret, ("Failed to create "
                              "and start volume %s" % self.volname))
        g.log.info("Successfully created and started the volume %s",
                   self.volname)

        # peer probe to a new node, N3
        ret, _, err = peer_probe(self.mnode, self.servers[2])
        self.assertEqual(ret, 0, ("Peer probe failed to %s from %s with "
                                  "error message %s" % (self.servers[2],
                                                        self.mnode, err)))
        g.log.info("Peer probe from %s to %s is success", self.mnode,
                   self.servers[2])

        # add a brick from N3 to the volume
        num_bricks_to_add = 1
        server_info = {}
        server_info[self.servers[2]] = self.all_servers_info[self.servers[2]]
        brick = form_bricks_list(self.mnode, self.volname, num_bricks_to_add,
                                 self.servers[2], server_info)
        ret, _, _ = add_brick(self.mnode, self.volname, brick)
        self.assertEqual(ret, 0, ("Failed to add brick to volume %s"
                                  % self.volname))
        g.log.info("add brick to the volume %s is success", self.volname)

        # get volume info, it should have correct brick information
        ret = get_volume_info(self.mnode, self.volname)
        self.assertIsNotNone(ret, ("Failed to get volume info from %s"
                                   % self.mnode))
        g.log.info("volume info from %s is success", self.mnode)

        brick3 = ret[self.volname]['bricks']['brick'][2]['name']
        self.assertEqual(brick3, str(brick[0]), ("Volume info has incorrect "
                                                 "information"))
        g.log.info("Volume info has correct information")
    def test_fix_layout_start(self):
        # pylint: disable=too-many-statements
        # Get arequal checksum before starting fix-layout
        g.log.info("Getting arequal checksum before fix-layout")
        arequal_checksum_before_fix_layout = collect_mounts_arequal(
            self.mounts)

        # Log Volume Info and Status before expanding the volume.
        g.log.info("Logging volume info and Status before expanding volume")
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(
            ret, "Logging volume info and status failed on "
            "volume %s" % self.volname)
        g.log.info(
            "Successful in logging volume info and status of volume "
            "%s", self.volname)

        # Form brick list for expanding volume
        add_brick_list = form_bricks_list_to_add_brick(self.mnode,
                                                       self.volname,
                                                       self.servers,
                                                       self.all_servers_info,
                                                       distribute_count=1)
        self.assertIsNotNone(add_brick_list,
                             ("Volume %s: Failed to form "
                              "bricks list to expand", self.volname))
        g.log.info("Volume %s: Formed bricks list to expand", self.volname)

        # Expanding volume by adding bricks to the volume
        g.log.info("Volume %s: Expand start")
        ret, _, _ = add_brick(self.mnode, self.volname, add_brick_list)
        self.assertEqual(ret, 0, ("Volume %s: Expand failed", self.volname))
        g.log.info("Volume %s: Expand successful", self.volname)

        # Wait for gluster processes to come online
        g.log.info("Wait for gluster processes to come online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Volume %s: one or more volume process are "
                              "not up", self.volname))
        g.log.info("All volume %s processes are online", self.volname)

        # Log Volume Info and Status after expanding the volume
        g.log.info("Logging volume info and Status after expanding volume")
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(
            ret, "Logging volume info and status failed on "
            "volume %s" % self.volname)
        g.log.info(
            "Successful in logging volume info and status of volume "
            "%s", self.volname)

        # Verify volume's all process are online
        g.log.info("Verifying volume's all process are online")
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Volume %s : All process are not online", self.volname))
        g.log.info("Volume %s : All process are online", self.volname)

        # Start Rebalance fix-layout
        g.log.info("Starting fix-layout on the volume")
        ret, _, _ = rebalance_start(self.mnode, self.volname, fix_layout=True)
        self.assertEqual(ret, 0, ("Volume %s: fix-layout start failed"
                                  "%s", self.volname))
        g.log.info("Volume %s: fix-layout start success", self.volname)

        # Wait for fix-layout to complete
        g.log.info("Waiting for fix-layout to complete")
        ret = wait_for_fix_layout_to_complete(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Volume %s: Fix-layout is still in-progress", self.volname))
        g.log.info("Volume %s: Fix-layout completed successfully",
                   self.volname)

        # Check Rebalance status after fix-layout is complete
        g.log.info("Checking Rebalance status")
        ret, _, _ = rebalance_status(self.mnode, self.volname)
        self.assertEqual(
            ret, 0,
            ("Volume %s: Failed to get rebalance status", self.volname))
        g.log.info("Volume %s: Successfully got rebalance status",
                   self.volname)

        # Get arequal checksum after fix-layout is complete
        g.log.info("arequal after fix-layout is complete")
        arequal_checksum_after_fix_layout = collect_mounts_arequal(self.mounts)

        # Compare arequals checksum before and after fix-layout
        g.log.info("Comparing checksum before and after fix-layout")
        self.assertEqual(arequal_checksum_before_fix_layout,
                         arequal_checksum_after_fix_layout,
                         "arequal checksum is NOT MATCHNG")
        g.log.info("arequal checksum is SAME")

        # Check if there are any file migrations after fix-layout
        status_info = get_rebalance_status(self.mnode, self.volname)
        for node in range(len(status_info['node'])):
            status_info = get_rebalance_status(self.mnode, self.volname)
            file_migration_count = status_info['node'][node]['files']
            self.assertEqual(
                int(file_migration_count), 0,
                ("Server %s: Few files are migrated", self.servers[node]))
            g.log.info("Server %s: No files are migrated")

        # Check if new bricks contains any files
        for brick in add_brick_list:
            brick_node, brick_path = brick.split(":")
            cmd = ('find %s -type f ! -perm 1000 | grep -ve .glusterfs' %
                   brick_path)
            _, out, _ = g.run(brick_node, cmd)
            self.assertEqual(
                len(out), 0,
                (("Files(excluded linkto files) are present on %s:%s"),
                 (brick_node, brick_path)))
            g.log.info("No files (excluded linkto files) are present on %s:%s",
                       brick_node, brick_path)
示例#19
0
    def test_glusterd_rebalance(self):
        '''
        -> Create Volume
        -> Fuse mount the volume
        -> Perform I/O on fuse mount
        -> Add bricks to the volume
        -> Perform rebalance on the volume
        -> While rebalance is in progress,
        -> restart glusterd on all the nodes in the cluster
        '''

        # run IOs
        g.log.info("Starting IO on all mounts...")
        self.all_mounts_procs = []
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = (
                "python %s create_deep_dirs_with_files "
                "--dirname-start-num %d "
                "--dir-depth 4 "
                "--dir-length 6 "
                "--max-num-of-dirs 3 "
                "--num-of-files 25 %s" %
                (self.script_upload_path, self.counter, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
            self.counter = self.counter + 10

        # Validate IO
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")

        # Forming brick list
        brick_list = form_bricks_list_to_add_brick(self.mnode, self.volname,
                                                   self.servers,
                                                   self.all_servers_info)

        # Adding Bricks
        ret, _, _ = add_brick(self.mnode, self.volname, brick_list)
        self.assertEqual(ret, 0,
                         "Failed to add brick to the volume %s" % self.volname)
        g.log.info("Brick added successfully to the volume %s", self.volname)

        # Performing rebalance
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, 'Failed to start rebalance on volume %s' % self.volname)
        g.log.info("Rebalance started successfully on volume %s", self.volname)

        # Checking Rebalance is in progress or not
        rebalance_status = get_rebalance_status(self.mnode, self.volname)
        if rebalance_status['aggregate']['statusStr'] != 'in progress':
            raise ExecutionError("Rebalance is not in 'in progress' state, "
                                 "either rebalance is in compeleted state or"
                                 " failed to get rebalance status")

        # Restart glusterd
        ret = restart_glusterd(self.servers)
        self.assertTrue(ret, "Failed to restart glusterd on servers")
        g.log.info("Glusterd restarted successfully on %s", self.servers)

        # Checking glusterd status
        count = 0
        while count < 60:
            ret = is_glusterd_running(self.servers)
            if not ret:
                break
            sleep(2)
            count += 1
        self.assertEqual(ret, 0, "Glusterd is not running on some of the "
                         "servers")
        g.log.info("Glusterd is running on all servers %s", self.servers)
    def test_add_identical_brick(self):
        """
        In this test case:
        1. Create Dist Volume on Node 1
        2. Down brick on Node 1
        3. Peer Probe N2 from N1
        4. Add identical brick on newly added node
        5. Check volume status
        """

        # pylint: disable=too-many-statements
        # Create a distributed volume on Node1
        number_of_brick = 1
        servers_info_from_single_node = {
            self.servers[0]: self.all_servers_info[self.servers[0]]
        }
        self.volname = "testvol"
        bricks_list = form_bricks_list(self.servers[0], self.volname,
                                       number_of_brick, self.servers[0],
                                       servers_info_from_single_node)
        ret, _, _ = volume_create(self.servers[0],
                                  self.volname,
                                  bricks_list,
                                  force=False)
        self.assertEqual(ret, 0, "Volume create failed")
        g.log.info("Volume %s created successfully", self.volname)

        ret, _, _ = volume_start(self.servers[0], self.volname, True)
        self.assertEqual(ret, 0, ("Failed to start the "
                                  "volume %s", self.volname))
        g.log.info("Get all the bricks of the volume")
        bricks_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(bricks_list, "Failed to get the brick list")
        g.log.info("Successfully got the list of bricks of volume")

        ret = bring_bricks_offline(self.volname, bricks_list[0])
        self.assertTrue(ret, "Failed to bring down the bricks")
        g.log.info("Successfully brought the bricks down")

        ret, _, _ = peer_probe(self.servers[0], self.servers[1])
        self.assertEqual(ret, 0, ("peer probe from %s to %s is failed",
                                  self.servers[0], self.servers[1]))
        g.log.info("peer probe is success from %s to "
                   "%s", self.servers[0], self.servers[1])

        # wait for some time before add-brick
        time.sleep(2)

        # Replace just host IP to create identical brick
        add_bricks = []
        add_bricks.append(
            string.replace(bricks_list[0], self.servers[0], self.servers[1]))
        ret, _, _ = add_brick(self.mnode, self.volname, add_bricks)
        self.assertEqual(ret, 0, "Failed to add the bricks to the volume")
        g.log.info("Successfully added bricks to volume %s", add_bricks[0])

        ret, _, _ = volume_start(self.mnode, self.volname, force=True)
        self.assertEqual(ret, 0, "Volume start with force failed")

        vol_status = get_volume_status(self.mnode, self.volname)
        self.assertIsNotNone(
            vol_status, "Failed to get volume "
            "status for %s" % self.volname)
    def test_detach_node_used_to_mount(self):
        # pylint: disable=too-many-statements
        """
        Test case:
        1.Create a 1X3 volume with only 3 nodes from the cluster.
        2.Mount volume on client node using the ip of the fourth node.
        3.Write IOs to the volume.
        4.Detach node N4 from cluster.
        5.Create a new directory on the mount point.
        6.Create a few files using the same command used in step 3.
        7.Add three more bricks to make the volume
          2x3 using add-brick command.
        8.Do a gluster volume rebalance on the volume.
        9.Create more files from the client on the mount point.
        10.Check for files on bricks from both replica sets.
        11.Create a new directory from the client on the mount point.
        12.Check for directory in both replica sets.
        """

        # Create and start a volume
        ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
        self.assertTrue(ret, "Failed to create and start volume")
        g.log.info("Volume %s created successfully", self.volname)

        # Mounting the volume.
        ret, _, _ = mount_volume(self.volname,
                                 mtype=self.mount_type,
                                 mpoint=self.mounts[0].mountpoint,
                                 mserver=self.servers[4],
                                 mclient=self.mounts[0].client_system)
        self.assertEqual(ret, 0, ("Volume %s is not mounted") % self.volname)
        g.log.info("Volume mounted successfully using %s", self.servers[4])

        # Creating 100 files.
        command = ('for number in `seq 1 100`;do touch ' +
                   self.mounts[0].mountpoint + '/file$number; done')
        ret, _, _ = g.run(self.mounts[0].client_system, command)
        self.assertEqual(ret, 0, "File creation failed.")
        g.log.info("Files create on mount point.")

        # Detach N4 from the list.
        ret, _, _ = peer_detach(self.mnode, self.servers[4])
        self.assertEqual(ret, 0, "Failed to detach %s" % self.servers[4])
        g.log.info("Peer detach successful %s", self.servers[4])

        # Creating a dir.
        ret = mkdir(self.mounts[0].client_system,
                    self.mounts[0].mountpoint + "/dir1",
                    parents=True)
        self.assertTrue(ret, ("Failed to create directory dir1."))
        g.log.info("Directory dir1 created successfully.")

        # Creating 100 files.
        command = ('for number in `seq 101 200`;do touch ' +
                   self.mounts[0].mountpoint + '/file$number; done')
        ret, _, _ = g.run(self.mounts[0].client_system, command)
        self.assertEqual(ret, 0, "File creation failed.")
        g.log.info("Files create on mount point.")

        # Forming brick list
        brick_list = form_bricks_list_to_add_brick(self.mnode, self.volname,
                                                   self.servers,
                                                   self.all_servers_info)

        # Adding bricks
        ret, _, _ = add_brick(self.mnode, self.volname, brick_list)
        self.assertEqual(ret, 0,
                         "Failed to add brick to the volume %s" % self.volname)
        g.log.info("Brick added successfully to the volume %s", self.volname)

        # Start rebalance for volume.
        g.log.info("Starting rebalance on the volume")
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to start rebalance "
                                  "on the volume %s", self.volname))
        g.log.info("Successfully started rebalance on the volume %s",
                   self.volname)

        # Creating 100 files.
        command = ('for number in `seq 201 300`;do touch ' +
                   self.mounts[0].mountpoint + '/file$number; done')
        ret, _, _ = g.run(self.mounts[0].client_system, command)
        self.assertEqual(ret, 0, "File creation failed.")
        g.log.info("Files create on mount point.")

        # Check for files on bricks.
        attempts = 10
        while attempts:
            number = str(randint(1, 300))
            for brick in brick_list:
                brick_server, brick_dir = brick.split(':')
                file_name = brick_dir + "/file" + number
                if file_exists(brick_server, file_name):
                    g.log.info("Check xattr"
                               " on host %s for file %s", brick_server,
                               file_name)
                    ret = get_fattr_list(brick_server, file_name)
                    self.assertTrue(ret,
                                    ("Failed to get xattr for %s" % file_name))
                    g.log.info("Got xattr for %s successfully", file_name)
            attempts -= 1

        # Creating a dir.
        ret = mkdir(self.mounts[0].client_system,
                    self.mounts[0].mountpoint + "/dir2")
        if not ret:
            attempts = 5
            while attempts:
                ret = mkdir(self.mounts[0].client_system,
                            self.mounts[0].mountpoint + "/dir2")
                if ret:
                    break
                attempts -= 1
        self.assertTrue(ret, ("Failed to create directory dir2."))
        g.log.info("Directory dir2 created successfully.")

        # Check for directory in both replica sets.
        for brick in brick_list:
            brick_server, brick_dir = brick.split(':')
            folder_name = brick_dir + "/dir2"
            if file_exists(brick_server, folder_name):
                g.log.info(
                    "Check trusted.glusterfs.dht"
                    " on host %s for directory %s", brick_server, folder_name)
                ret = get_fattr(brick_server, folder_name,
                                'trusted.glusterfs.dht')
                self.assertTrue(ret, ("Failed to get trusted.glusterfs.dht"
                                      " xattr for %s" % folder_name))
                g.log.info(
                    "Get trusted.glusterfs.dht xattr"
                    " for %s successfully", folder_name)
示例#22
0
    def test_manual_heal_should_trigger_heal(self):
        """
        - create a single brick volume
        - add some files and directories
        - get arequal from mountpoint
        - add-brick such that this brick makes the volume a replica vol 1x2
        - start heal
        - make sure heal is completed
        - get arequals from all bricks and compare with arequal from mountpoint
        """
        # pylint: disable=too-many-statements,too-many-locals
        # Start IO on mounts
        g.log.info("Starting IO on all mounts...")
        self.all_mounts_procs = []
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("python %s create_deep_dirs_with_files "
                   "--dir-length 1 "
                   "--dir-depth 1 "
                   "--max-num-of-dirs 1 "
                   "--num-of-files 10 %s" %
                   (self.script_upload_path, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
            g.log.info("IO on %s:%s is started successfully",
                       mount_obj.client_system, mount_obj.mountpoint)
        self.io_validation_complete = False

        # Validate IO
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")
        self.io_validation_complete = True

        # Get arequal for mount before adding bricks
        g.log.info('Getting arequal before adding bricks...')
        ret, arequals = collect_mounts_arequal(self.mounts)
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting arequal after healing is successful')
        mount_point_total = arequals[0].splitlines()[-1].split(':')[-1]

        # Form brick list to add
        g.log.info('Forming brick list to add...')
        bricks_to_add = form_bricks_list(self.mnode, self.volname, 1,
                                         self.servers, self.all_servers_info)
        g.log.info('Brick list to add: %s', bricks_to_add)

        # Add bricks
        g.log.info("Start adding bricks to volume...")
        ret, _, _ = add_brick(self.mnode,
                              self.volname,
                              bricks_to_add,
                              force=True,
                              replica_count=2)
        self.assertFalse(ret, "Failed to add bricks %s" % bricks_to_add)
        g.log.info("Adding bricks is successful on volume %s", self.volname)

        # Make sure the newly added bricks are available in the volume
        # get the bricks for the volume
        g.log.info("Fetching bricks for the volume: %s", self.volname)
        bricks_list = get_all_bricks(self.mnode, self.volname)
        g.log.info("Brick list: %s", bricks_list)
        for brick in bricks_to_add:
            self.assertIn(brick, bricks_list,
                          'Brick %s is not in brick list' % brick)
        g.log.info('New bricks are present in the volume')

        # Make sure volume change from distribute to replicate volume
        vol_info_dict = get_volume_type_info(self.mnode, self.volname)
        vol_type = vol_info_dict['volume_type_info']['typeStr']
        self.assertEqual(
            'Replicate', vol_type, 'Volume type is not converted to Replicate '
            'after adding bricks')
        g.log.info('Volume type is successfully converted to Replicate '
                   'after adding bricks')

        # Start healing
        ret = trigger_heal(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal is not started')
        g.log.info('Healing is started')

        # Monitor heal completion
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal has not yet completed')

        # Check if heal is completed
        ret = is_heal_complete(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal is not complete')
        g.log.info('Heal is completed successfully')

        # Check for split-brain
        ret = is_volume_in_split_brain(self.mnode, self.volname)
        self.assertFalse(ret, 'Volume is in split-brain state')
        g.log.info('Volume is not in split-brain state')

        # Get arequal on bricks and compare with mount_point_total
        # It should be the same
        g.log.info('Getting arequal on bricks...')
        arequals_after_heal = {}
        for brick in bricks_list:
            g.log.info('Getting arequal on bricks %s...', brick)
            node, brick_path = brick.split(':')
            command = ('arequal-checksum -p %s '
                       '-i .glusterfs -i .landfill -i .trashcan' % brick_path)
            ret, arequal, _ = g.run(node, command)
            self.assertFalse(ret, 'Failed to get arequal on brick %s' % brick)
            g.log.info('Getting arequal for %s is successful', brick)
            brick_total = arequal.splitlines()[-1].split(':')[-1]
            arequals_after_heal[brick] = brick_total
            self.assertEqual(
                mount_point_total, brick_total,
                'Arequals for mountpoint and %s are not equal' % brick)
            g.log.info('Arequals for mountpoint and %s are equal', brick)
        g.log.info('All arequals are equal for replicated')
    def test_quota_rebalance(self):
        """
        * Enable quota on the volume
        * set hard and soft time out to zero.
        * Create some files and directories from mount point
          so that the limits are reached.
        * Perform add-brick operation on the volume.
        * Start rebalance on the volume.
        * While rebalance is in progress, create some more files
          and directories from the mount point until limit is hit
        """

        # pylint: disable=too-many-statements
        # Enable Quota on volume
        ret, _, _ = quota_enable(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, ("Failed to enable quota on the volume %s", self.volname))
        g.log.info("Successfully enabled quota on the volume %s", self.volname)

        # Set the Quota timeouts to 0 for strict accounting
        ret, _, _ = quota_set_hard_timeout(self.mnode, self.volname, 0)
        self.assertEqual(
            ret, 0, ("Failed to set hard-timeout to 0 for %s", self.volname))
        ret, _, _ = quota_set_soft_timeout(self.mnode, self.volname, 0)
        self.assertEqual(
            ret, 0, ("Failed to set soft-timeout to 0 for %s", self.volname))
        g.log.info("Quota soft and hard timeout has been set to 0 for %s",
                   self.volname)

        # Set limit of 100 MB on root dir of the volume
        ret, _, _ = quota_limit_usage(self.mnode, self.volname, "/", "100MB")
        self.assertEqual(ret, 0, "Failed to set Quota for dir '/'")
        g.log.info("Successfully set quota limit for dir '/'")

        # Do some IO  until hard limit is reached
        cmd = ("/usr/bin/env python %s create_files "
               "-f 100 --fixed-file-size 1M --base-file-name file %s" %
               (self.script_upload_path, self.mounts[0].mountpoint))
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        self.all_mounts_procs.append(proc)

        # Wait for IO to complete and validate IO
        self.assertTrue(
            wait_for_io_to_complete(self.all_mounts_procs, self.mounts[0]),
            "IO failed on some of the clients")
        g.log.info("IO is successful on all mounts")

        # Add bricks to the volume
        if "replica_count" in self.volume["voltype"]:
            new_bricks_count = self.volume["voltype"]["replica_count"]
        elif "disperse_count" in self.volume["voltype"]:
            new_bricks_count = self.volume["voltype"]["disperse_count"]
        else:
            new_bricks_count = 3
        bricks_list = form_bricks_list(self.mnode, self.volname,
                                       new_bricks_count, self.servers,
                                       self.all_servers_info)
        g.log.info("new brick list: %s", bricks_list)
        ret, _, _ = add_brick(self.mnode, self.volname, bricks_list, False)
        self.assertEqual(ret, 0, "Failed to add the bricks to the volume")
        g.log.info("Successfully added bricks to volume")

        # Perform rebalance start operation
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Rebalance Start Failed")

        # Wait for at least one file to be lookedup/scanned on the nodes
        status_info = get_rebalance_status(self.mnode, self.volname)
        count = 0
        while count < 20:
            lookups_start_count = 0
            for node in range(len(status_info['node'])):
                status_info = get_rebalance_status(self.mnode, self.volname)
                lookups_file_count = status_info['node'][node]['lookups']
                if int(lookups_file_count) > 0:
                    lookups_start_count += 1
                    sleep(2)
            if lookups_start_count == len(self.servers):
                g.log.info(
                    "Volume %s: At least one file is lookedup/scanned "
                    "on all nodes", self.volname)
                break
            count += 1

        # Perform some more IO and check if hard limit is honoured
        self.all_mounts_procs = []
        cmd = ("/usr/bin/env python %s create_files "
               "-f 100 --fixed-file-size 1M --base-file-name newfile %s" %
               (self.script_upload_path, self.mounts[0].mountpoint))
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        self.all_mounts_procs.append(proc)

        # Wait for IO to complete and validate IO
        # This should fail as the quotas were already reached
        self.assertFalse(
            validate_io_procs(self.all_mounts_procs, self.mounts[0]),
            "Unexpected: IO passed on the client even after quota is reached")
        g.log.info("Expected: IO failed as quota is reached")

        # Wait for rebalance to finish
        ret = wait_for_rebalance_to_complete(self.mnode,
                                             self.volname,
                                             timeout=180)
        self.assertTrue(ret, "Unexpected: Rebalance did not complete")
        g.log.info("Rebalance completed as expected")
    def test_add_brick_when_quorum_not_met(self):

        # pylint: disable=too-many-statements
        # create and start a volume
        ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
        self.assertTrue(ret, ("Failed to create "
                              "and start volume %s" % self.volname))
        g.log.info("Volume is created and started successfully")

        # set cluster.server-quorum-type as server
        ret = set_volume_options(self.mnode, self.volname,
                                 {'cluster.server-quorum-type': 'server'})
        self.assertTrue(ret, ("Failed to set the quorum type as a server"
                              " on volume %s", self.volname))
        g.log.info("Able to set server quorum successfully on volume %s",
                   self.volname)

        # Setting quorum ratio to 95%
        ret = set_volume_options(self.mnode, 'all',
                                 {'cluster.server-quorum-ratio': '95%'})
        self.assertTrue(
            ret, "Failed to set server quorum ratio on %s" % self.volname)
        g.log.info("Able to set server quorum ratio successfully on %s",
                   self.servers)

        # bring down glusterd of half nodes
        num_of_servers = len(self.servers)
        num_of_nodes_to_bring_down = num_of_servers / 2

        for node in range(num_of_nodes_to_bring_down, num_of_servers):
            ret = stop_glusterd(self.servers[node])
            self.assertTrue(
                ret, ("Failed to stop glusterd on %s" % self.servers[node]))
            g.log.info("Glusterd stopped successfully on server %s",
                       self.servers[node])

        for node in range(num_of_nodes_to_bring_down, num_of_servers):
            count = 0
            while count < 80:
                ret = is_glusterd_running(self.servers[node])
                if ret:
                    break
                sleep(2)
                count += 1
            self.assertNotEqual(
                ret, 0, "glusterd is still running on %s" % self.servers[node])

        # Verifying node count in volume status after glusterd stopped
        # on half of the servers, Its not possible to check the brick status
        # immediately in volume status after glusterd stop
        count = 0
        while count < 100:
            vol_status = get_volume_status(self.mnode, self.volname)
            servers_count = len(vol_status[self.volname])
            if servers_count == (num_of_servers - num_of_nodes_to_bring_down):
                break
            sleep(2)
            count += 1

        # confirm that quorum is not met, brick process should be down
        bricks_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(bricks_list, "Failed to get the brick list")
        bricks_to_check = bricks_list[0:num_of_nodes_to_bring_down]
        ret = are_bricks_offline(self.mnode, self.volname, bricks_to_check)
        self.assertTrue(
            ret, "Unexpected: Server quorum is not met, "
            "Bricks are up")
        g.log.info("Server quorum is not met, bricks are down as expected")

        # try add brick operation, which should fail
        num_bricks_to_add = 1
        brick = form_bricks_list(self.mnode, self.volname, num_bricks_to_add,
                                 self.servers, self.all_servers_info)
        ret, _, _ = add_brick(self.mnode, self.volname, brick)
        self.assertNotEqual(ret, 0, ("Unexpected: add brick is success, "
                                     "when quorum is not met"))
        g.log.info("Add brick is failed as expected, when quorum is not met")

        # confirm that, newly added brick is not part of volume
        bricks_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(bricks_list, "Failed to get the brick list")
        if brick in bricks_list:
            ret = False
            self.assertTrue(ret, ("Unexpected: add brick is success, "
                                  "when quorum is not met"))
        g.log.info("Add brick is failed as expected, when quorum is not met")

        # set cluster.server-quorum-type as none
        ret = set_volume_options(self.mnode, self.volname,
                                 {'cluster.server-quorum-type': 'none'})
        self.assertTrue(ret, ("Failed to set the quorum type as a server"
                              " on volume %s", self.volname))
        g.log.info("Able to set server quorum successfully on volume %s",
                   self.volname)
    def test_rebalance_start_not_fail(self):
        """
        1. On Node N1, Add "transport.socket.bind-address N1" in the
            /etc/glusterfs/glusterd.vol
        2. Create a replicate (1X3) and disperse (4+2) volumes with
            name more than 108 chars
        3. Mount the both volumes using node 1 where you added the
            "transport.socket.bind-address" and start IO(like untar)
        4. Perform add-brick on replicate volume 3-bricks
        5. Start rebalance on replicated volume
        6. Perform add-brick for disperse volume 6 bricks
        7. Start rebalance of disperse volume
        """
        cmd = ("sed -i 's/end-volume/option "
               "transport.socket.bind-address {}\\n&/g' "
               "/etc/glusterfs/glusterd.vol".format(self.mnode))
        disperse = ("disperse_e4upxjmtre7dl4797wedbp7r3jr8equzvmcae9f55t6z1"
                    "ffhrlk40jtnrzgo4n48fjf6b138cttozw3c6of3ze71n9urnjkshoi")
        replicate = ("replicate_e4upxjmtre7dl4797wedbp7r3jr8equzvmcae9f55t6z1"
                     "ffhrlk40tnrzgo4n48fjf6b138cttozw3c6of3ze71n9urnjskahn")

        volnames = (disperse, replicate)
        for volume, vol_name in (("disperse", disperse), ("replicate",
                                                          replicate)):

            bricks_list = form_bricks_list(self.mnode, volume,
                                           6 if volume == "disperse" else 3,
                                           self.servers, self.all_servers_info)
            if volume == "replicate":
                ret, _, _ = volume_create(self.mnode,
                                          replicate,
                                          bricks_list,
                                          replica_count=3)

            else:
                ret, _, _ = volume_create(self.mnode,
                                          disperse,
                                          bricks_list,
                                          force=True,
                                          disperse_count=6,
                                          redundancy_count=2)

            self.assertFalse(
                ret, "Unexpected: Volume create '{}' failed ".format(vol_name))
            ret, _, _ = volume_start(self.mnode, vol_name)
            self.assertFalse(ret, "Failed to start volume")

        # Add entry in 'glusterd.vol'
        ret, _, _ = g.run(self.mnode, cmd)
        self.assertFalse(ret, "Failed to add entry in 'glusterd.vol' file")

        self.list_of_io_processes = []

        # mount volume
        self.mount = ("/mnt/replicated_mount", "/mnt/disperse_mount")
        for mount_dir, volname in zip(self.mount, volnames):
            ret, _, _ = mount_volume(volname, "glusterfs", mount_dir,
                                     self.mnode, self.clients[0])
            self.assertFalse(
                ret, "Failed to mount the volume '{}'".format(mount_dir))

            # Run IO
            # Create a dir to start untar
            # for mount_point in self.mount:
            self.linux_untar_dir = "{}/{}".format(mount_dir, "linuxuntar")
            ret = mkdir(self.clients[0], self.linux_untar_dir)
            self.assertTrue(ret, "Failed to create dir linuxuntar for untar")

            # Start linux untar on dir linuxuntar
            ret = run_linux_untar(self.clients[:1],
                                  mount_dir,
                                  dirs=tuple(['linuxuntar']))
            self.list_of_io_processes += ret
            self.is_io_running = True

        # Add Brick to replicate Volume
        bricks_list = form_bricks_list(self.mnode, replicate, 3, self.servers,
                                       self.all_servers_info, "replicate")
        ret, _, _ = add_brick(self.mnode, replicate, bricks_list, force=True)
        self.assertFalse(ret, "Failed to add-brick '{}'".format(replicate))

        # Trigger Rebalance on the volume
        ret, _, _ = rebalance_start(self.mnode, replicate)
        self.assertFalse(
            ret,
            "Failed to start rebalance on the volume '{}'".format(replicate))

        # Add Brick to disperse Volume
        bricks_list = form_bricks_list(self.mnode, disperse, 6, self.servers,
                                       self.all_servers_info, "disperse")

        ret, _, _ = add_brick(self.mnode, disperse, bricks_list, force=True)
        self.assertFalse(ret, "Failed to add-brick '{}'".format(disperse))

        # Trigger Rebalance on the volume
        ret, _, _ = rebalance_start(self.mnode, disperse)
        self.assertFalse(
            ret, "Failed to start rebalance on the volume {}".format(disperse))

        # Check if Rebalance is completed on both the volume
        for volume in (replicate, disperse):
            ret = wait_for_rebalance_to_complete(self.mnode,
                                                 volume,
                                                 timeout=600)
            self.assertTrue(
                ret,
                "Rebalance is not Compleated on Volume '{}'".format(volume))