def test_add_brick_already_part_of_another_volume(self):
        """ Test adding bricks to the volume which are already part of another
        volume.
        """
        # create and start a volume
        self.volume['name'] = "existing_volume"
        self.volname = "existing_volume"
        ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
        self.assertTrue(ret, "Failed to create and start volume")
        g.log.info("Volume created and started successfully")
        sub_vols = get_subvols(self.mnode, self.volname)['volume_subvols']

        # create and start a new volume
        self.volume['name'] = "new_volume"
        self.volname = "new_volume"
        ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
        self.assertTrue(ret, "Failed to create and start volume")
        g.log.info("Volume created and started successfully")
        cmd = ("gluster volume add-brick %s %s " % (self.volname,
                                                    ' '.join(sub_vols[0])))
        g.log.info("Adding bricks to volume %s which are already part of an"
                   "another volume", self.volname)
        _, _, err = g.run(self.mnode, cmd)
        self.assertIn("Brick may be containing or be contained by an existing"
                      " brick", err, "add-brick is successful")
        g.log.info("Volume add-brick failed with error %s ", err)
Exemplo n.º 2
0
    def setup_samba_ctdb_cluster(cls):
        """
        Create ctdb-samba cluster if doesn't exists

        Returns:
            bool: True if successfully setup samba else false
        """
        # Check if ctdb setup is up and running
        if is_ctdb_status_healthy(cls.primary_node):
            g.log.info("ctdb setup already up skipping " "ctdb setup creation")
            return True
        g.log.info("Proceeding with ctdb setup creation")
        for mnode in cls.servers:
            ret = edit_hook_script(mnode, cls.ctdb_volname)
            if not ret:
                return False
            ret = enable_ctdb_cluster(mnode)
            if not ret:
                return False
            ret = create_nodes_file(mnode, cls.ctdb_nodes)
            if not ret:
                return False
            ret = create_public_address_file(mnode, cls.ctdb_vips)
            if not ret:
                return False
        server_info = cls.all_servers_info
        ctdb_config = cls.ctdb_volume_config
        g.log.info("Setting up ctdb volume %s", cls.ctdb_volname)
        ret = setup_volume(mnode=cls.primary_node,
                           all_servers_info=server_info,
                           volume_config=ctdb_config)
        if not ret:
            g.log.error("Failed to setup ctdb volume %s", cls.ctdb_volname)
            return False
        g.log.info("Successful in setting up volume %s", cls.ctdb_volname)

        # Wait for volume processes to be online
        g.log.info("Wait for volume %s processes to be online",
                   cls.ctdb_volname)
        ret = wait_for_volume_process_to_be_online(cls.mnode, cls.ctdb_volname)
        if not ret:
            g.log.error(
                "Failed to wait for volume %s processes to "
                "be online", cls.ctdb_volname)
            return False
        g.log.info(
            "Successful in waiting for volume %s processes to be "
            "online", cls.ctdb_volname)

        # start ctdb services
        ret = start_ctdb_service(cls.servers)
        if not ret:
            return False

        ret = is_ctdb_status_healthy(cls.primary_node)
        if not ret:
            g.log.error("CTDB setup creation failed - exiting")
            return False
        g.log.info("CTDB setup creation successfull")
        return True
Exemplo n.º 3
0
    def setUp(self):
        self.get_super_method(self, 'setUp')()

        # Overriding the volume type to specifically test the volume type
        if self.volume_type == "distributed-replicated":
            self.volume['voltype'] = {
                'type': 'distributed-replicated',
                'replica_count': 2,
                'dist_count': 4,
                'transport': 'tcp'
            }

        # Create a distributed-replicated volume with replica count 2
        # using first four nodes
        servers_info_from_four_nodes = {}
        for server in self.servers[0:4]:
            servers_info_from_four_nodes[server] = self.all_servers_info[
                server]

        self.volume['servers'] = self.servers[0:4]
        ret = setup_volume(self.mnode,
                           servers_info_from_four_nodes,
                           self.volume,
                           force=False)
        if not ret:
            raise ExecutionError("Volume create failed on four nodes")
        g.log.info("Distributed replicated volume created successfully")
Exemplo n.º 4
0
    def setUp(self):
        # Calling GlusterBaseClass setUp
        GlusterBaseClass.setUp.im_func(self)

        self.extra_servers = self.servers[-2:]
        self.servers = self.servers[:-2]
        # Performing peer detach
        for server in self.extra_servers:
            # Peer detach
            ret, _, _ = peer_detach(self.mnode, server)
            if ret:
                raise ExecutionError("Peer detach failed")
            g.log.info("Peer detach successful.")

        # Create volume using first four nodes
        servers_info_from_four_nodes = {}
        for server in self.servers:
            servers_info_from_four_nodes[server] = self.all_servers_info[
                server]

        self.volume['servers'] = self.servers
        ret = setup_volume(self.mnode,
                           servers_info_from_four_nodes,
                           self.volume,
                           force=False)
        if not ret:
            raise ExecutionError("Volume create failed on four nodes")
        g.log.info("Distributed replicated volume created successfully")

        # Verfiy glustershd process releases its parent process
        ret = is_shd_daemonized(self.servers)
        if not ret:
            raise ExecutionError("Self Heal Daemon process was still"
                                 " holding parent process.")
        g.log.info("Self Heal Daemon processes are online")
    def setUp(self):

        GlusterBaseClass.setUp.im_func(self)

        # Create and start a volume.
        ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
        if ret:
            ExecutionError("Failed to create and start volume")
Exemplo n.º 6
0
 def setUp(self):
     GlusterBaseClass.setUp.im_func(self)
     self.test_method_complete = False
     # Creating a volume and starting it
     ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
     if not ret:
         raise ExecutionError("Failed to create volume")
     g.log.info("Volume created successfully")
Exemplo n.º 7
0
    def setUp(self):

        self.get_super_method(self, 'setUp')()

        # Create and start a volume.
        ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
        if ret:
            ExecutionError("Failed to create and start volume")
 def setUp(self):
     self.get_super_method(self, 'setUp')()
     self.test_method_complete = False
     # Creating a volume and starting it
     ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
     if not ret:
         raise ExecutionError("Failed to create volume")
     g.log.info("Volume created successfully")
Exemplo n.º 9
0
    def test_volume_reduce_replica_count(self):
        """
        Test case:
        1) Create a 2x3 replica volume.
        2) Remove bricks in the volume to make it a 2x2 replica volume.
        3) Remove bricks in the volume to make it a distribute volume.
        """

        # Create and start a volume
        ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
        self.assertTrue(ret, "Failed to create and start volume")

        # Getting a list of all the bricks.
        g.log.info("Get all the bricks of the volume")
        self.brick_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(self.brick_list, "Failed to get the brick list")
        g.log.info("Successfully got the list of bricks of volume")

        # Converting 2x3 to 2x2 volume.
        remove_brick_list = [self.brick_list[0], self.brick_list[3]]
        ret, _, _ = remove_brick(self.mnode,
                                 self.volname,
                                 remove_brick_list,
                                 'force',
                                 replica_count=2)
        self.assertEqual(ret, 0, "Failed to start remove brick operation")
        g.log.info("Remove brick operation successfully")

        # Checking if volume is 2x2 or not.
        volume_info = get_volume_info(self.mnode, self.volname)
        brick_count = int(volume_info[self.volname]['brickCount'])
        self.assertEqual(brick_count, 4, "Failed to remove 2 bricks.")
        g.log.info("Successfully removed 2 bricks.")
        type_string = volume_info[self.volname]['typeStr']
        self.assertEqual(type_string, 'Distributed-Replicate',
                         "Convertion to 2x2 failed.")
        g.log.info("Convertion to 2x2 successful.")

        # Converting 2x2 to distribute volume.
        remove_brick_list = [self.brick_list[1], self.brick_list[4]]
        ret, _, _ = remove_brick(self.mnode,
                                 self.volname,
                                 remove_brick_list,
                                 'force',
                                 replica_count=1)
        self.assertEqual(ret, 0, "Failed to start remove brick operation")
        g.log.info("Remove brick operation successfully")

        # Checking if volume is pure distribute or not.
        volume_info = get_volume_info(self.mnode, self.volname)
        brick_count = int(volume_info[self.volname]['brickCount'])
        self.assertEqual(brick_count, 2, "Failed to remove 2 bricks.")
        g.log.info("Successfully removed 2 bricks.")
        type_string = volume_info[self.volname]['typeStr']
        self.assertEqual(type_string, 'Distribute',
                         "Convertion to distributed failed.")
        g.log.info("Convertion to distributed successful.")
Exemplo n.º 10
0
    def setUp(self):
        """
        setUp method for every test
        """
        # calling GlusterBaseClass setUp
        GlusterBaseClass.setUp.im_func(self)
        self.volume_list = []
        # create a volume
        ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
        self.volume_list.append(self.volname)
        if not ret:
            raise ExecutionError("Volume creation failed: %s" % self.volname)

        # Creating another volume
        second_volume = "second_volume"
        self.volume['name'] = second_volume
        ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
        self.volume_list.append(second_volume)
        if not ret:
            raise ExecutionError("Volume creation failed: %s" % second_volume)
    def setUpClass(cls):
        """
        setup volume and initialize necessary variables
        which is used in tests
        """
        # calling GlusterBaseClass setUpClass
        cls.get_super_method(cls, 'setUpClass')()

        list_of_vol = [
            'distributed-dispersed', 'replicated', 'dispersed', 'distributed',
            'distributed-replicated'
        ]
        cls.volume_configs = []
        if cls.default_volume_type_config['distributed']['dist_count'] > 3:
            cls.default_volume_type_config['distributed']['dist_count'] = 3

        for volume_type in list_of_vol:
            cls.volume_configs.append({
                'name':
                'testvol_%s' % (volume_type),
                'servers':
                cls.servers,
                'voltype':
                cls.default_volume_type_config[volume_type]
            })
        for volume_config in cls.volume_configs:
            ret = setup_volume(mnode=cls.mnode,
                               all_servers_info=cls.all_servers_info,
                               volume_config=volume_config,
                               multi_vol=True)
            volname = volume_config['name']
            if not ret:
                raise ExecutionError("Failed to setup Volume" " %s" % volname)
            g.log.info("Successful in setting volume %s", volname)

            # Verify volume's all process are online for 60 sec
            g.log.info("Verifying volume's all process are online")
            ret = wait_for_volume_process_to_be_online(cls.mnode, volname, 60)
            if not ret:
                raise ExecutionError("Volume %s : All process are not online" %
                                     volname)
            g.log.info("Successfully Verified volume %s processes are online",
                       volname)

        # Verfiy glustershd process releases its parent process
        g.log.info("Verifying Self Heal Daemon process is daemonized")
        ret = is_shd_daemonized(cls.servers)
        if not ret:
            raise ExecutionError("Self Heal Daemon process was still"
                                 " holding parent process.")
        g.log.info("Self Heal Daemon processes are online")
Exemplo n.º 12
0
    def setup_volume(cls, volume_create_force=False):
        """Setup the volume:
            - Create the volume, Start volume, Set volume
            options, enable snapshot/quota/tier if specified in the config
            file.
            - Wait for volume processes to be online
            - Export volume as NFS/SMB share if mount_type is NFS or SMB
            - Log volume info and status
        Args:
            volume_create_force(bool): True if create_volume should be
                executed with 'force' option.
        Returns (bool): True if all the steps mentioned in the descriptions
            passes. False otherwise.
        """
        force_volume_create = False
        if cls.volume_create_force:
            force_volume_create = True

        # Validate peers before setting up volume
        g.log.info("Validate peers before setting up volume ")
        ret = cls.validate_peers_are_connected()
        if not ret:
            g.log.error("Failed to validate peers are in connected state "
                        "before setting up volume")
            return False
        g.log.info("Successfully validated peers are in connected state "
                   "before setting up volume")

        # Setup Volume
        g.log.info("Setting up volume %s", cls.volname)
        ret = setup_volume(mnode=cls.mnode,
                           all_servers_info=cls.all_servers_info,
                           volume_config=cls.volume, force=force_volume_create)
        if not ret:
            g.log.error("Failed to Setup volume %s", cls.volname)
            return False
        g.log.info("Successful in setting up volume %s", cls.volname)

        # ToDo : Wait for volume processes to be online

        # Log Volume Info and Status
        g.log.info("Log Volume %s Info and Status", cls.volname)
        ret = log_volume_info_and_status(cls.mnode, cls.volname)
        if not ret:
            g.log.error("Logging volume %s info and status failed",
                        cls.volname)
            return False
        g.log.info("Successful in logging volume %s info and status",
                   cls.volname)

        return True
    def test_enable_brickmux_create_and_stop_three_volumes(self):
        """
        Test Case:
        1.Set cluster.brick-multiplex to enabled.
        2.Create three 1x3 replica volumes.
        3.Start all the three volumes.
        4.Stop three volumes one by one.
        """

        # Timestamp of current test case of start time
        ret, test_timestamp, _ = g.run_local('date +%s')
        test_timestamp = test_timestamp.strip()

        # Setting cluster.brick-multiplex to enable
        ret = set_volume_options(self.mnode, 'all',
                                 {'cluster.brick-multiplex': 'enable'})
        self.assertTrue(ret, "Failed to set brick-multiplex to enable.")
        g.log.info("Successfully set brick-multiplex to enable.")

        # Create and start 3 volume
        for number in range(1, 4):
            self.volume['name'] = ("test_volume_%s" % number)
            self.volname = ("test_volume_%s" % number)
            ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
            self.assertTrue(ret,
                            "Failed to create and start %s" % self.volname)
            g.log.info("Successfully created and started volume %s.",
                       self.volname)

        # Checking brick process count.
        for brick in get_all_bricks(self.mnode, self.volname):
            server = brick.split(":")[0]
            count = get_brick_processes_count(server)
            self.assertEqual(
                count, 1, "ERROR: More than one brick process on %s." % server)
            g.log.info("Only one brick process present on %s", server)

        # Stop three volumes one by one.
        for number in range(1, 4):
            self.volume['name'] = ("test_volume_%s" % number)
            self.volname = ("test_volume_%s" % number)
            ret, _, _ = volume_stop(self.mnode, self.volname)
            self.assertEqual(ret, 0,
                             "Failed to stop the volume %s" % self.volname)
            g.log.info("Volume %s stopped successfully", self.volname)

        # Checking for core files.
        ret = is_core_file_created(self.servers, test_timestamp)
        self.assertTrue(ret, "Core file found.")
        g.log.info("No core files found, glusterd service running "
                   "successfully")
Exemplo n.º 14
0
 def use_config_setup_volumes(self):
     """
     A function to setup volumes based on volume_configs.
     """
     for volume_config in self.volume_configs:
         ret = setup_volume(mnode=self.mnode,
                            all_servers_info=self.all_servers_info,
                            volume_config=volume_config,
                            force=False)
         if not ret:
             raise ExecutionError("Failed to setup Volume %s"
                                  % volume_config['name'])
         g.log.info("Successful in setting volume %s",
                    volume_config['name'])
Exemplo n.º 15
0
    def test_glusterd_start_stop_cycle(self):
        """
        Test Glusterd stop-start cycle of gluster processes.
        1. Create a gluster volume.
        2. Kill all gluster related processes.
        3. Start glusterd service.
        4. Verify that all gluster processes are up.
        5. Repeat the above steps 5 times.
        """
        # Create and start a volume
        ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
        self.assertTrue(ret, "Failed to create and start volume")

        for _ in range(5):
            killed_gluster_process_count = []
            # Kill gluster processes in all servers
            for server in self.servers:
                cmd = ('pkill --signal 9 -c -e "(glusterd|glusterfsd|glusterfs'
                       ')"|tail -1')
                ret, out, err = g.run(server, cmd)
                self.assertEqual(ret, 0, err)
                killed_gluster_process_count.append(int(out))

            # Start glusterd on all servers.
            ret = start_glusterd(self.servers)
            self.assertTrue(ret, ("Failed to restart glusterd on desired"
                                  " nodes."))
            g.log.info("Glusterd started on desired nodes.")

            # Wait for gluster processes to come up.
            self._wait_for_gluster_process_online_state()

            spawned_gluster_process_count = []
            # Get number of  gluster processes spawned in all server
            for server in self.servers:
                cmd = ('pgrep -c "(glusterd|glusterfsd|glusterfs)"')
                ret, out, err = g.run(server, cmd)
                self.assertEqual(ret, 0, err)
                spawned_gluster_process_count.append(int(out))

            # Compare process count in each server.
            for index, server in enumerate(self.servers):
                self.assertEqual(
                    killed_gluster_process_count[index],
                    spawned_gluster_process_count[index],
                    ("All processes not up and running on %s", server))
    def setUp(self):
        """
        setup volume and initialize necessary variables
        which is used in tests
        """
        # Calling GlusterBaseClass setUp
        self.get_super_method(self, 'setUp')()

        # Setup Volume for all the volume types
        self.volume_configs = []
        for volume_type in self.default_volume_type_config:
            self.volume_configs.append({
                'name':
                'testvol_%s' % volume_type,
                'servers':
                self.servers,
                'voltype':
                self.default_volume_type_config[volume_type]
            })

        for volume_config in self.volume_configs[1:]:
            ret = setup_volume(mnode=self.mnode,
                               all_servers_info=self.all_servers_info,
                               volume_config=volume_config,
                               multi_vol=True)
            volname = volume_config['name']
            if not ret:
                raise ExecutionError("Failed to setup Volume" " %s" % volname)
            g.log.info("Successful in setting volume %s", volname)

            # Verify volume's all process are online for 60 sec
            ret = wait_for_volume_process_to_be_online(self.mnode, volname, 60)
            if not ret:
                raise ExecutionError("Volume %s : All process are not online" %
                                     volname)
            g.log.info("Successfully Verified volume %s processes are online",
                       volname)

        # Verfiy glustershd process releases its parent process
        ret = is_shd_daemonized(self.servers)
        if not ret:
            raise ExecutionError("Self Heal Daemon process was still"
                                 " holding parent process.")
        g.log.info("Self Heal Daemon processes are online")

        self.glustershd = "/var/lib/glusterd/glustershd/glustershd-server.vol"
    def test_uuid_in_volume_info_xml(self):

        # create a two node cluster
        ret = peer_probe_servers(self.servers[0], self.servers[1])
        self.assertTrue(
            ret,
            "Peer probe failed to %s from %s" % (self.mnode, self.servers[1]))

        # create a 2x2 volume
        servers_info_from_two_node_cluster = {}
        for server in self.servers[0:2]:
            servers_info_from_two_node_cluster[server] = self.all_servers_info[
                server]

        self.volume['servers'] = self.servers[0:2]
        self.volume['voltype']['replica_count'] = 2
        self.volume['voltype']['dist_count'] = 2
        ret = setup_volume(self.mnode, servers_info_from_two_node_cluster,
                           self.volume)
        self.assertTrue(ret, ("Failed to create"
                              "and start volume %s" % self.volname))

        # probe a new node from cluster
        ret = peer_probe_servers(self.mnode, self.servers[2])
        self.assertTrue(
            ret,
            "Peer probe failed to %s from %s" % (self.mnode, self.servers[2]))

        # check gluster vol info --xml from newly probed node
        xml_output = get_volume_info(self.servers[2], self.volname)
        self.assertIsNotNone(xml_output,
                             ("Failed to get volume info --xml for"
                              "volume %s from newly probed node %s" %
                              (self.volname, self.servers[2])))

        # volume info --xml should have non zero UUID for host and brick
        uuid_with_zeros = '00000000-0000-0000-0000-000000000000'
        len_of_uuid = len(uuid_with_zeros)
        number_of_bricks = int(xml_output[self.volname]['brickCount'])
        for i in range(number_of_bricks):
            uuid = xml_output[self.volname]['bricks']['brick'][i]['hostUuid']
            self.assertEqual(len(uuid), len_of_uuid, "Invalid uuid length")
            self.assertNotEqual(uuid, uuid_with_zeros,
                                ("Invalid uuid %s" % uuid))
Exemplo n.º 18
0
    def test_setting_vol_option_with_max_characters(self):

        ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
        self.assertTrue(ret, ("Failed to create "
                              "and start volume %s" % self.volname))
        auth_list = []
        for ip_addr in range(256):
            auth_list.append('192.168.122.%d' % ip_addr)
        for ip_addr in range(7):
            auth_list.append('192.168.123.%d' % ip_addr)
        ip_list = ','.join(auth_list)

        # set auth.allow with <4096 characters and restart the glusterd
        g.log.info("Setting auth.allow with string of length %d for %s",
                   len(ip_list), self.volname)
        self.options = {"auth.allow": ip_list}
        ret = set_volume_options(self.mnode, self.volname, self.options)
        self.assertTrue(ret, ("Failed to set auth.allow with string of length"
                              " %d for %s" % (len(ip_list), self.volname)))
        ret = restart_glusterd(self.mnode)
        self.assertTrue(ret,
                        "Failed to restart the glusterd on %s" % self.mnode)

        # set auth.allow with >4096 characters and restart the glusterd
        ip_list = ip_list + ",192.168.123.7"
        self.options = {"auth.allow": ip_list}
        g.log.info("Setting auth.allow with string of length %d for %s",
                   len(ip_list), self.volname)
        ret = set_volume_options(self.mnode, self.volname, self.options)
        self.assertTrue(ret, ("Failed to set auth.allow with string of length"
                              " %d for %s" % (len(ip_list), self.volname)))
        ret = restart_glusterd(self.mnode)
        self.assertTrue(ret,
                        "Failed to restart the glusterd on %s" % self.mnode)
        count = 0
        while count < 60:
            ret = is_glusterd_running(self.mnode)
            if not ret:
                break
            sleep(2)
            count += 1
        self.assertEqual(ret, 0, "glusterd is not running on %s" % self.mnode)
    def test_setting_vol_option_with_max_characters(self):

        ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
        self.assertTrue(ret, ("Failed to create "
                              "and start volume %s" % self.volname))
        auth_list = []
        for ip_addr in range(256):
            auth_list.append('192.168.122.%d' % ip_addr)
        for ip_addr in range(7):
            auth_list.append('192.168.123.%d' % ip_addr)
        ip_list = ','.join(auth_list)

        # set auth.allow with <4096 characters and restart the glusterd
        g.log.info("Setting auth.allow with string of length %d for %s",
                   len(ip_list), self.volname)
        self.options = {"auth.allow": ip_list}
        ret = set_volume_options(self.mnode, self.volname, self.options)
        self.assertTrue(ret, ("Failed to set auth.allow with string of length"
                              " %d for %s" % (len(ip_list), self.volname)))
        ret = restart_glusterd(self.mnode)
        self.assertTrue(ret, "Failed to restart the glusterd on %s"
                        % self.mnode)

        # set auth.allow with >4096 characters and restart the glusterd
        ip_list = ip_list + ",192.168.123.7"
        self.options = {"auth.allow": ip_list}
        g.log.info("Setting auth.allow with string of length %d for %s",
                   len(ip_list), self.volname)
        ret = set_volume_options(self.mnode, self.volname, self.options)
        self.assertTrue(ret, ("Failed to set auth.allow with string of length"
                              " %d for %s" % (len(ip_list), self.volname)))
        ret = restart_glusterd(self.mnode)
        self.assertTrue(ret, "Failed to restart the glusterd on %s"
                        % self.mnode)

        ret = wait_for_glusterd_to_start(self.servers)
        self.assertTrue(ret, "glusterd is not running on %s"
                        % self.servers)
        g.log.info("Glusterd start on the nodes : %s "
                   "succeeded", self.servers)
Exemplo n.º 20
0
    def test_vol_delete_when_one_of_nodes_is_down(self):

        # create a volume and start it
        ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
        self.assertTrue(ret, "Failed to create and start the volume")
        g.log.info("Successfully created and started the volume")

        # get the bricks list
        bricks_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(bricks_list, "Failed to get the bricks list")

        # get a random node other than self.mnode
        if len(bricks_list) >= len(self.servers):
            random_index = random.randint(1, len(self.servers) - 1)
        else:
            random_index = random.randint(1, len(bricks_list) - 1)

        # stop glusterd on the random node

        node_to_stop_glusterd = self.servers[random_index]
        ret = stop_glusterd(node_to_stop_glusterd)
        self.assertTrue(ret, "Failed to stop glusterd")

        # stop the volume, it should succeed
        ret, _, _ = volume_stop(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Volume stop failed")

        # try to delete the volume, it should fail
        ret, out, err = g.run(
            self.mnode, "gluster volume delete %s "
            "--mode=script" % self.volname)
        self.assertNotEqual(
            ret, 0, "Volume delete succeeded when one of the"
            " brick node is down")
        if re.search(r'Some of the peers are down', err):
            g.log.info("Volume delete failed with expected error message")
        else:
            g.log.info("Volume delete failed with unexpected error message")
Exemplo n.º 21
0
    def setUpClass(cls):
        """Setup volume exports volume with nfs-ganesha,
            mounts the volume.
        """
        NfsGaneshaClusterSetupClass.setUpClass.im_func(cls)

        # Peer probe servers
        ret = peer_probe_servers(cls.mnode, cls.servers)
        if not ret:
            raise ExecutionError("Failed to peer probe servers")

        g.log.info("All peers are in connected state")

        # Peer Status from mnode
        peer_status(cls.mnode)

        for server in cls.servers:
            mount_info = [{
                'protocol': 'glusterfs',
                'mountpoint': '/run/gluster/shared_storage',
                'server': server,
                'client': {
                    'host': server
                },
                'volname': 'gluster_shared_storage',
                'options': ''
            }]

            mount_obj = create_mount_objs(mount_info)
            if not mount_obj[0].is_mounted():
                ret = mount_obj[0].mount()
                if not ret:
                    raise ExecutionError(
                        "Unable to mount volume '%s:%s' "
                        "on '%s:%s'" %
                        (mount_obj.server_system, mount_obj.volname,
                         mount_obj.client_system, mount_obj.mountpoint))

        # Setup Volume
        ret = setup_volume(mnode=cls.mnode,
                           all_servers_info=cls.all_servers_info,
                           volume_config=cls.volume,
                           force=True)
        if not ret:
            raise ExecutionError("Setup volume %s failed", cls.volume)
        time.sleep(10)

        # Export volume with nfs ganesha, if it is not exported already
        vol_option = get_volume_options(cls.mnode,
                                        cls.volname,
                                        option='ganesha.enable')
        if vol_option is None:
            raise ExecutionError("Failed to get ganesha.enable volume option "
                                 "for %s " % cls.volume)
        if vol_option['ganesha.enable'] != 'on':
            ret, out, err = export_nfs_ganesha_volume(mnode=cls.mnode,
                                                      volname=cls.volname)
            if ret != 0:
                raise ExecutionError(
                    "Failed to export volume %s "
                    "as NFS export", cls.volname)
            time.sleep(5)

        ret = wait_for_nfs_ganesha_volume_to_get_exported(
            cls.mnode, cls.volname)
        if not ret:
            raise ExecutionError("Failed to export volume %s. volume is "
                                 "not listed in showmount" % cls.volname)
        else:
            g.log.info("Volume %s is exported successfully" % cls.volname)

        # Log Volume Info and Status
        ret = log_volume_info_and_status(cls.mnode, cls.volname)
        if not ret:
            raise ExecutionError("Logging volume %s info and status failed",
                                 cls.volname)

        # Create Mounts
        _rc = True
        for mount_obj in cls.mounts:
            ret = mount_obj.mount()
            if not ret:
                g.log.error("Unable to mount volume '%s:%s' on '%s:%s'",
                            mount_obj.server_system, mount_obj.volname,
                            mount_obj.client_system, mount_obj.mountpoint)
                _rc = False
        if not _rc:
            raise ExecutionError("Mounting volume %s on few clients failed",
                                 cls.volname)

        # Get info of mount before the IO
        log_mounts_info(cls.mounts)
Exemplo n.º 22
0
    def setup_volume(cls, volume_create_force=False):
        """Setup the volume:
            - Create the volume, Start volume, Set volume
            options, enable snapshot/quota/tier if specified in the config
            file.
            - Wait for volume processes to be online
            - Export volume as NFS/SMB share if mount_type is NFS or SMB
            - Log volume info and status

        Args:
            volume_create_force(bool): True if create_volume should be
                executed with 'force' option.

        Returns (bool): True if all the steps mentioned in the descriptions
            passes. False otherwise.
        """
        force_volume_create = False
        if volume_create_force or cls.volume_create_force:
            force_volume_create = True

        # Validate peers before setting up volume
        g.log.info("Validate peers before setting up volume ")
        ret = cls.validate_peers_are_connected()
        if not ret:
            g.log.error("Failed to validate peers are in connected state "
                        "before setting up volume")
            return False
        g.log.info("Successfully validated peers are in connected state "
                   "before setting up volume")

        # Setup Volume
        g.log.info("Setting up volume %s", cls.volname)
        ret = setup_volume(mnode=cls.mnode,
                           all_servers_info=cls.all_servers_info,
                           volume_config=cls.volume,
                           force=force_volume_create)
        if not ret:
            g.log.error("Failed to Setup volume %s", cls.volname)
            return False
        g.log.info("Successful in setting up volume %s", cls.volname)

        # Wait for volume processes to be online
        g.log.info("Wait for volume %s processes to be online", cls.volname)
        ret = wait_for_volume_process_to_be_online(cls.mnode, cls.volname)
        if not ret:
            g.log.error(
                "Failed to wait for volume %s processes to "
                "be online", cls.volname)
            return False
        g.log.info(
            "Successful in waiting for volume %s processes to be "
            "online", cls.volname)

        # Export/Share the volume based on mount_type
        if cls.mount_type != "glusterfs":
            g.log.info("Export/Sharing the volume %s", cls.volname)
            if "nfs" in cls.mount_type:
                ret = export_volume_through_nfs(
                    mnode=cls.mnode,
                    volname=cls.volname,
                    enable_ganesha=cls.enable_nfs_ganesha)
                if not ret:
                    g.log.error("Failed to export volume %s "
                                "as NFS export", cls.volname)
                    return False
                g.log.info(
                    "Successful in exporting the volume %s "
                    "as NFS export", cls.volname)

                # Set NFS-Ganesha specific volume options
                if cls.enable_nfs_ganesha and cls.nfs_ganesha_export_options:
                    g.log.info(
                        "Setting NFS-Ganesha export specific "
                        "volume options on volume %s", cls.volname)
                    ret = set_volume_options(
                        mnode=cls.mnode,
                        volname=cls.volname,
                        options=cls.nfs_ganesha_export_options)
                    if not ret:
                        g.log.error(
                            "Failed to set NFS-Ganesha "
                            "export specific options on "
                            "volume %s", cls.volname)
                        return False
                    g.log.info(
                        "Successful in setting NFS-Ganesha export "
                        "specific volume options on volume %s", cls.volname)

            if "smb" in cls.mount_type or "cifs" in cls.mount_type:
                ret = share_volume_over_smb(mnode=cls.mnode,
                                            volname=cls.volname,
                                            smb_users_info=cls.smb_users_info)
                if not ret:
                    g.log.error("Failed to export volume %s "
                                "as SMB Share", cls.volname)
                    return False
                g.log.info("Successful in exporting volume %s as SMB Share",
                           cls.volname)

                # Set SMB share specific volume options
                if cls.smb_share_options:
                    g.log.info(
                        "Setting SMB share specific volume options "
                        "on volume %s", cls.volname)
                    ret = set_volume_options(mnode=cls.mnode,
                                             volname=cls.volname,
                                             options=cls.smb_share_options)
                    if not ret:
                        g.log.error(
                            "Failed to set SMB share "
                            "specific options "
                            "on volume %s", cls.volname)
                        return False
                    g.log.info(
                        "Successful in setting SMB share specific "
                        "volume options on volume %s", cls.volname)

        # Log Volume Info and Status
        g.log.info("Log Volume %s Info and Status", cls.volname)
        ret = log_volume_info_and_status(cls.mnode, cls.volname)
        if not ret:
            g.log.error("Logging volume %s info and status failed",
                        cls.volname)
            return False
        g.log.info("Successful in logging volume %s info and status",
                   cls.volname)

        return True
    def test_create_vol_used_bricks(self):
        '''
        -> Create distributed-replica Volume
        -> Add 6 bricks to the volume
        -> Mount the volume
        -> Perform some I/O's on mount point
        -> unmount the volume
        -> Stop and delete the volume
        -> Create another volume using bricks of deleted volume
        '''

        # Create and start a volume
        self.volume['name'] = "test_create_vol_with_fresh_bricks"
        self.volname = "test_create_vol_with_fresh_bricks"
        ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
        self.assertTrue(ret, "Failed to create and start volume")

        # Forming brick list
        brick_list = form_bricks_list(self.mnode, self.volname, 6,
                                      self.servers, self.all_servers_info)
        # Adding bricks to the volume
        ret, _, _ = add_brick(self.mnode, self.volname, brick_list)
        self.assertEqual(
            ret, 0, "Failed to add bricks to the volume %s" % self.volname)
        g.log.info("Bricks added successfully to the volume %s", self.volname)

        # Mounting the volume.
        for mount_obj in self.mounts:
            ret, _, _ = mount_volume(self.volname,
                                     mtype=self.mount_type,
                                     mpoint=mount_obj.mountpoint,
                                     mserver=self.mnode,
                                     mclient=mount_obj.client_system)
            self.assertEqual(ret, 0,
                             ("Volume %s is not mounted") % (self.volname))
            g.log.info("Volume mounted successfully : %s", self.volname)

        # run IOs
        g.log.info("Starting IO on all mounts...")
        self.all_mounts_procs = []
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = (
                "/usr/bin/env python %s create_deep_dirs_with_files "
                "--dirname-start-num %d --dir-depth 2 "
                "--dir-length 5 --max-num-of-dirs 3 "
                "--num-of-files 10 %s" %
                (self.script_upload_path, self.counter, mount_obj.mountpoint))

            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
            self.counter = self.counter + 10

        # Validate IO
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")

        # Unmouting the volume.
        for mount_obj in self.mounts:
            ret, _, _ = umount_volume(mclient=mount_obj.client_system,
                                      mpoint=mount_obj.mountpoint)
            self.assertEqual(ret, 0,
                             "Volume %s is not unmounted" % (self.volname))
            g.log.info("Volume unmounted successfully : %s", self.volname)

        # Getting brick list
        self.brick_list = get_all_bricks(self.mnode, self.volname)
        if not self.brick_list:
            raise ExecutionError("Failed to get the brick list of %s" %
                                 self.volname)

        # Stop volume
        ret, _, _ = volume_stop(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to stop the volume %s" % self.volname)
        g.log.info("Volume %s stopped successfully", self.volname)

        # Delete Volume
        ret, _, _ = g.run(
            self.mnode,
            "gluster volume delete %s --mode=script" % self.volname)
        self.assertEqual(ret, 0, "Failed to delete volume %s" % self.volname)
        g.log.info("Volume deleted successfully %s", self.volname)

        # Create another volume by using bricks of deleted volume
        self.volname = "test_create_vol_used_bricks"
        ret, _, err = volume_create(self.mnode,
                                    self.volname,
                                    brick_list[0:6],
                                    replica_count=3)
        self.assertNotEqual(
            ret, 0, "Volume creation should fail with used "
            "bricks but volume creation success")
        g.log.info("Failed to create volume with used bricks")

        # Checking failed message of volume creation
        msg = ' '.join([
            'volume create: test_create_vol_used_bricks: failed:',
            brick_list[0].split(':')[1], 'is already part of a volume'
        ])
        self.assertIn(
            msg, err, "Incorrect error message for volume creation "
            "with used bricks")
        g.log.info("correct error message for volume creation with "
                   "used bricks")
    def test_nfs_ganesha_export_with_multiple_volumes(self):
        """
        Test case to verify multiple volumes gets exported when IO is in
        progress.
        """
        # Starting IO on the mounts
        all_mounts_procs = []
        count = 1
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 2 "
                   "--dir-length 10 "
                   "--max-num-of-dirs 5 "
                   "--num-of-files 5 %s" %
                   (self.script_upload_path, count, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)
            count = count + 10

        # Create and export five new volumes
        for i in range(5):
            # Check availability of bricks to create new volume
            num_of_unused_bricks = 0

            servers_unused_bricks_dict = get_servers_unused_bricks_dict(
                self.mnode, self.all_servers, self.all_servers_info)
            for each_server_unused_bricks_list in list(
                    servers_unused_bricks_dict.values()):
                num_of_unused_bricks = (num_of_unused_bricks +
                                        len(each_server_unused_bricks_list))

            if num_of_unused_bricks < 2:
                self.assertNotEqual(
                    i, 0, "New volume cannot be created due "
                    "to unavailability of bricks.")
                g.log.warning(
                    "Tried to create five new volumes. But could "
                    "create only %s volume due to unavailability "
                    "of bricks.", str(i))
                break

            self.volume['name'] = "nfsvol" + str(i)
            self.volume['voltype']['type'] = 'distributed'
            self.volume['voltype']['replica_count'] = 1
            self.volume['voltype']['dist_count'] = 2

            new_vol = self.volume['name']

            # Create volume
            ret = setup_volume(mnode=self.mnode,
                               all_servers_info=self.all_servers_info,
                               volume_config=self.volume,
                               force=True)
            if not ret:
                self.assertTrue(ret, "Setup volume [%s] failed" % self.volume)

            g.log.info("Wait for volume processes to be online")
            ret = wait_for_volume_process_to_be_online(self.mnode, new_vol)
            self.assertTrue(
                ret, "Volume %s process not online despite "
                "waiting for 300 seconds" % new_vol)

            # Export volume with nfs ganesha
            ret, _, _ = export_nfs_ganesha_volume(mnode=self.mnode,
                                                  volname=new_vol)
            self.assertEqual(ret, 0, ("Failed to export volume %s "
                                      "using nfs-ganesha" % new_vol))

            # Wait for volume to get exported
            ret = wait_for_nfs_ganesha_volume_to_get_exported(
                self.mnode, new_vol)
            self.assertTrue(
                ret, "Volume %s is not exported after setting "
                "ganesha.enable 'on'" % new_vol)
            g.log.info("Exported nfs-ganesha volume %s", new_vol)

            # Log Volume Info and Status
            ret = log_volume_info_and_status(self.mnode, new_vol)
            self.assertTrue(
                ret, "Logging volume %s info and status failed" % new_vol)

        # Validate IO
        g.log.info("Validating IO")
        ret = validate_io_procs(all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("Successfully validated all IO")
Exemplo n.º 25
0
    def test_volume_create(self):

        # create and start a volume
        self.volume['name'] = "first_volume"
        self.volname = "first_volume"
        ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
        self.assertTrue(ret, "Failed to create and start volume")

        # bring a brick down and volume start force should bring it to online

        g.log.info("Get all the bricks of the volume")
        bricks_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(bricks_list, "Failed to get the brick list")
        g.log.info("Successfully got the list of bricks of volume")

        ret = bring_bricks_offline(self.volname, bricks_list[0:2])
        self.assertTrue(ret, "Failed to bring down the bricks")
        g.log.info("Successfully brought the bricks down")

        ret, _, _ = volume_start(self.mnode, self.volname, force=True)
        self.assertEqual(ret, 0, "Failed to start the volume")
        g.log.info("Volume start with force is success")

        ret = wait_for_bricks_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, "Failed to bring the bricks online")
        g.log.info("Volume start with force successfully brought all the "
                   "bricks online")

        # create volume with previously used bricks and different volume name
        self.volname = "second_volume"
        ret, _, _ = volume_create(self.mnode, self.volname, bricks_list)
        self.assertNotEqual(
            ret, 0, "Expected: It should fail to create a "
            "volume with previously used bricks. Actual:"
            "Successfully created the volume with previously"
            " used bricks")
        g.log.info("Failed to create the volume with previously used bricks")

        # create a volume with already existing volume name
        self.volume['name'] = "first_volume"
        ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
        self.assertTrue(
            ret, "Expected: It should fail to create a volume"
            " with already existing volume name. Actual: "
            "Successfully created the volume with "
            "already existing volname")
        g.log.info("Failed to create the volume with already existing volname")

        # creating a volume with non existing brick path should fail

        self.volname = "second_volume"
        bricks_list = form_bricks_list(self.mnode, self.volname,
                                       len(self.servers), self.servers,
                                       self.all_servers_info)
        nonexisting_brick_index = random.randint(0, len(bricks_list) - 1)
        non_existing_brick = bricks_list[nonexisting_brick_index].split(":")[0]
        non_existing_path = ":/brick/non_existing_path"
        non_existing_brick = non_existing_brick + non_existing_path
        bricks_list[nonexisting_brick_index] = non_existing_brick

        ret, _, _ = volume_create(self.mnode, self.volname, bricks_list)
        self.assertNotEqual(
            ret, 0, "Expected: Creating a volume with non "
            "existing brick path should fail. Actual: "
            "Successfully created the volume with "
            "non existing brick path")
        g.log.info("Failed to create the volume with non existing brick path")

        # cleanup the volume and peer detach all servers. form two clusters,try
        # to create a volume with bricks whose nodes are in different clusters

        # cleanup volumes
        vol_list = get_volume_list(self.mnode)
        self.assertIsNotNone(vol_list, "Failed to get the volume list")

        for volume in vol_list:
            ret = cleanup_volume(self.mnode, volume)
            self.assertTrue(ret, "Unable to delete volume % s" % volume)

        # peer detach all servers
        ret = peer_detach_servers(self.mnode, self.servers)
        self.assertTrue(ret, "Peer detach to all servers is failed")
        g.log.info("Peer detach to all the servers is success")

        # form cluster 1
        ret, _, _ = peer_probe(self.servers[0], self.servers[1])
        self.assertEqual(
            ret, 0, "Peer probe from %s to %s is failed" %
            (self.servers[0], self.servers[1]))
        g.log.info("Peer probe is success from %s to %s" %
                   (self.servers[0], self.servers[1]))

        # form cluster 2
        ret, _, _ = peer_probe(self.servers[2], self.servers[3])
        self.assertEqual(
            ret, 0, "Peer probe from %s to %s is failed" %
            (self.servers[2], self.servers[3]))
        g.log.info("Peer probe is success from %s to %s" %
                   (self.servers[2], self.servers[3]))

        # Creating a volume with bricks which are part of another
        # cluster should fail
        ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
        self.assertFalse(
            ret, "Expected: Creating a volume with bricks"
            " which are part of another cluster should fail."
            " Actual: Successfully created the volume with "
            "bricks which are part of another cluster")
        g.log.info("Failed to create the volume with bricks which are "
                   "part of another cluster")

        # form a cluster, bring a node down. try to create a volume when one of
        # the brick node is down
        ret, _, _ = peer_detach(self.servers[2], self.servers[3])
        self.assertEqual(ret, 0, "Peer detach is failed")
        g.log.info("Peer detach is success")

        ret = peer_probe_servers(self.mnode, self.servers)
        self.assertTrue(ret, "Peer probe is failed")
        g.log.info("Peer probe to all the servers is success")

        random_server = self.servers[random.randint(1, len(self.servers) - 1)]
        ret = stop_glusterd(random_server)
        self.assertTrue(ret, "Glusterd is stopped successfully")

        self.volume['name'] = "third_volume"
        ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
        self.assertFalse(
            ret, "Expected: It should fail to create a volume "
            "when one of the node is down. Actual: Successfully "
            "created the volume with bbrick whose node is down")

        g.log.info("Failed to create the volume with brick whose node is down")
    def test_add_brick_functionality(self):

        ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
        self.assertTrue(
            ret, ("Failed to create and start volume %s" % self.volname))
        g.log.info("Volume created and started succssfully")

        # form bricks list to test add brick functionality

        replica_count_of_volume = self.volume['voltype']['replica_count']
        num_of_bricks = 4 * replica_count_of_volume
        bricks_list = form_bricks_list(self.mnode, self.volname, num_of_bricks,
                                       self.servers, self.all_servers_info)
        self.assertIsNotNone(bricks_list, "Bricks list is None")

        # Try to add a single brick to volume, which should fail as it is a
        # replicated volume, we should pass multiple of replica count number
        # of bricks

        bricks_list_to_add = [bricks_list[0]]
        ret, out, err = add_brick(self.mnode, self.volname, bricks_list_to_add)
        self.assertNotEqual(
            ret, 0, "Expected: It should fail to add a single"
            "brick to a replicated volume. Actual: "
            "Successfully added single brick to volume")
        g.log.info("failed to add a single brick to replicated volume")

        # add brick replica count number of bricks in which one is
        # non existing brick
        kwargs = {}
        kwargs['replica_count'] = replica_count_of_volume

        bricks_list_to_add = bricks_list[1:replica_count_of_volume + 1]

        num_of_bricks = len(bricks_list_to_add)
        index_of_non_existing_brick = random.randint(0, num_of_bricks - 1)
        complete_brick = bricks_list_to_add[index_of_non_existing_brick]
        non_existing_brick = complete_brick + "/non_existing_brick"
        bricks_list_to_add[index_of_non_existing_brick] = non_existing_brick

        ret, out, err = add_brick(self.mnode, self.volname, bricks_list_to_add,
                                  False, **kwargs)
        self.assertNotEqual(
            ret, 0, "Expected: It should fail to add non"
            "existing brick to a volume. Actual: "
            "Successfully added non existing brick to volume")
        g.log.info("failed to add a non existing brick to volume")

        # adding brick from node which is not part of cluster
        bricks_list_to_add = bricks_list[replica_count_of_volume +
                                         1:(2 * replica_count_of_volume) + 1]

        num_of_bricks = len(bricks_list_to_add)
        index_of_node = random.randint(0, num_of_bricks - 1)
        complete_brick = bricks_list_to_add[index_of_node].split(":")
        complete_brick[0] = "abc.def.ghi.jkl"
        bricks_list_to_add[index_of_node] = ":".join(complete_brick)
        ret, out, err = add_brick(self.mnode, self.volname, bricks_list_to_add,
                                  False, **kwargs)
        self.assertNotEqual(
            ret, 0, "Expected: It should fail to add brick "
            "from a node which is not part of a cluster."
            "Actual:Successfully added bricks from node which"
            " is not a part of cluster to volume")

        g.log.info("Failed to add bricks form node which is not a part of "
                   "cluster to volume")

        # add correct number of valid bricks, it should succeed

        bricks_list_to_add = bricks_list[(2 * replica_count_of_volume) +
                                         1:(3 * replica_count_of_volume) + 1]
        ret, out, err = add_brick(self.mnode, self.volname, bricks_list_to_add,
                                  False, **kwargs)
        self.assertEqual(ret, 0, "Failed to add the bricks to the volume")
        g.log.info("Successfully added bricks to volume")

        # Perform rebalance start operation
        ret, out, err = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Rebalance start is success")
    def test_sync_functinality(self):

        # create a 2x3 volume
        num_of_servers = len(self.servers)
        servers_info_from_cluster = {}
        for server in self.servers[0:num_of_servers - 1]:
            servers_info_from_cluster[server] = self.all_servers_info[server]

        self.volume['servers'] = self.servers[0:num_of_servers - 1]
        self.volume['voltype']['replica_count'] = 3
        self.volume['voltype']['dist_count'] = 2
        ret = setup_volume(self.mnode, servers_info_from_cluster, self.volume)
        self.assertTrue(ret, ("Failed to create "
                              "and start volume %s" % self.volname))
        g.log.info("Successfully created and started the volume %s",
                   self.volname)

        # stop glusterd on a random node of the cluster
        random_server_index = random.randint(1, num_of_servers - 2)
        random_server = self.servers[random_server_index]
        cmd = "systemctl stop glusterd"
        ret = g.run_async(random_server, cmd)
        g.log.info("Stopping glusterd on %s", random_server)

        # set a option on volume, stat-prefetch on
        self.options = {"stat-prefetch": "on"}
        ret = set_volume_options(self.mnode, self.volname, self.options)
        self.assertTrue(ret, ("Failed to set option stat-prefetch to on"
                              "for the volume %s" % self.volname))
        g.log.info(
            "Succeeded in setting stat-prefetch option to on"
            "for the volume %s", self.volname)

        # start glusterd on the node where glusterd is stopped
        ret = start_glusterd(random_server)
        self.assertTrue(ret, "Failed to start glusterd on %s" % random_server)

        ret = wait_for_glusterd_to_start(random_server)
        self.assertTrue(ret, "glusterd is not running on %s" % random_server)
        g.log.info("glusterd is started and running on %s", random_server)

        # volume info should be synced across the cluster
        out1 = get_volume_info(self.mnode, self.volname)
        self.assertIsNotNone(
            out1, "Failed to get the volume info from %s" % self.mnode)
        g.log.info("Getting volume info from %s is success", self.mnode)

        count = 0
        while count < 60:
            out2 = get_volume_info(random_server, self.volname)
            self.assertIsNotNone(
                out2, "Failed to get the volume info from %s" % random_server)
            if out1 == out2:
                break
            sleep(2)
            count += 1

        g.log.info("Getting volume info from %s is success", random_server)
        self.assertDictEqual(out1, out2, "volume info is not synced")

        # stop glusterd on a random server from cluster
        random_server_index = random.randint(1, num_of_servers - 2)
        random_server = self.servers[random_server_index]
        cmd = "systemctl stop glusterd"
        ret = g.run_async(random_server, cmd)
        g.log.info("Stopping glusterd on node %s", random_server)

        # peer probe a new node
        ret = peer_probe_servers(self.mnode, self.servers[num_of_servers - 1])
        self.assertTrue(
            ret, "Failed to peer probe %s from %s" %
            (self.servers[num_of_servers - 1], self.mnode))
        g.log.info("Peer probe from %s to %s is success", self.mnode,
                   self.servers[num_of_servers - 1])

        # start glusterd on the node where glusterd is stopped
        ret = start_glusterd(random_server)
        self.assertTrue(ret, "Failed to start glusterd on %s" % random_server)

        ret = wait_for_glusterd_to_start(random_server)
        self.assertTrue(ret, "glusterd is not running on %s" % random_server)
        g.log.info("glusterd is started and running on %s", random_server)

        # peer status should be synced across the cluster
        list1 = nodes_from_pool_list(self.mnode)
        self.assertIsNotNone(
            list1, "Failed to get nodes list in the cluster"
            "from %s" % self.mnode)
        g.log.info("Successfully got the nodes list in the cluster from %s",
                   self.mnode)

        # replacing ip with FQDN
        i = 0
        for node in list1:
            list1[i] = socket.getfqdn(node)
            i += 1
        list1 = sorted(list1)

        count = 0
        while count < 60:
            list2 = nodes_from_pool_list(random_server)
            self.assertIsNotNone(
                list2, "Failed to get nodes list in the "
                "cluster from %s" % random_server)
            # replacing ip with FQDN
            i = 0
            for node in list2:
                list2[i] = socket.getfqdn(node)
                i += 1

            list2 = sorted(list2)
            if list2 == list1:
                break
            sleep(2)
            count += 1

        g.log.info("Successfully got the nodes list in the cluster from %s",
                   random_server)

        self.assertListEqual(list1, list2, "Peer status is "
                             "not synced across the cluster")
        g.log.info("Peer status is synced across the cluster")
    def setUpClass(cls):
        """
        setup volume and initialize necessary variables
        which is used in tests
        """
        # calling GlusterBaseClass setUpClass
        cls.get_super_method(cls, 'setUpClass')()

        cls.default_volume_type_config = {
            'replicated': {
                'type': 'replicated',
                'replica_count': 2,
                'transport': 'tcp'
            },
            'dispersed': {
                'type': 'dispersed',
                'disperse_count': 6,
                'redundancy_count': 2,
                'transport': 'tcp'
            },
            'distributed': {
                'type': 'distributed',
                'dist_count': 2,
                'transport': 'tcp'
            },
            'distributed-replicated': {
                'type': 'distributed-replicated',
                'dist_count': 2,
                'replica_count': 3,
                'transport': 'tcp'
            }
        }

        # Setup Volume for all the volume types
        cls.volume_configs = []
        for volume_type in cls.default_volume_type_config:
            cls.volume_configs.append({
                'name':
                'testvol_%s' % volume_type,
                'servers':
                cls.servers,
                'voltype':
                cls.default_volume_type_config[volume_type]
            })

        for volume_config in cls.volume_configs:
            ret = setup_volume(mnode=cls.mnode,
                               all_servers_info=cls.all_servers_info,
                               volume_config=volume_config)
            volname = volume_config['name']
            if not ret:
                raise ExecutionError("Failed to setup Volume" " %s" % volname)
            g.log.info("Successful in setting volume %s", volname)

            # Verify volume's all process are online for 60 sec
            g.log.info("Verifying volume's all process are online")
            ret = wait_for_volume_process_to_be_online(cls.mnode, volname, 60)
            if not ret:
                raise ExecutionError("Volume %s : All process are not online" %
                                     volname)
            g.log.info("Successfully Verified volume %s processes are online",
                       volname)

        # Verfiy glustershd process releases its parent process
        g.log.info("Verifying Self Heal Daemon process is daemonized")
        ret = is_shd_daemonized(cls.servers)
        if not ret:
            raise ExecutionError("Self Heal Daemon process was still"
                                 " holding parent process.")
        g.log.info("Self Heal Daemon processes are online")

        cls.GLUSTERSHD = "/var/lib/glusterd/glustershd/glustershd-server.vol"
    def test_add_brick_when_quorum_not_met(self):

        # pylint: disable=too-many-statements
        # create and start a volume
        ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
        self.assertTrue(ret, ("Failed to create "
                              "and start volume %s" % self.volname))
        g.log.info("Volume is created and started successfully")

        # set cluster.server-quorum-type as server
        ret = set_volume_options(self.mnode, self.volname,
                                 {'cluster.server-quorum-type': 'server'})
        self.assertTrue(ret, ("Failed to set the quorum type as a server"
                              " on volume %s", self.volname))
        g.log.info("Able to set server quorum successfully on volume %s",
                   self.volname)

        # Setting quorum ratio to 95%
        ret = set_volume_options(self.mnode, 'all',
                                 {'cluster.server-quorum-ratio': '95%'})
        self.assertTrue(
            ret, "Failed to set server quorum ratio on %s" % self.volname)
        g.log.info("Able to set server quorum ratio successfully on %s",
                   self.servers)

        # bring down glusterd of half nodes
        num_of_servers = len(self.servers)
        num_of_nodes_to_bring_down = num_of_servers / 2

        for node in range(num_of_nodes_to_bring_down, num_of_servers):
            ret = stop_glusterd(self.servers[node])
            self.assertTrue(
                ret, ("Failed to stop glusterd on %s" % self.servers[node]))
            g.log.info("Glusterd stopped successfully on server %s",
                       self.servers[node])

        for node in range(num_of_nodes_to_bring_down, num_of_servers):
            count = 0
            while count < 80:
                ret = is_glusterd_running(self.servers[node])
                if ret:
                    break
                sleep(2)
                count += 1
            self.assertNotEqual(
                ret, 0, "glusterd is still running on %s" % self.servers[node])

        # Verifying node count in volume status after glusterd stopped
        # on half of the servers, Its not possible to check the brick status
        # immediately in volume status after glusterd stop
        count = 0
        while count < 100:
            vol_status = get_volume_status(self.mnode, self.volname)
            servers_count = len(vol_status[self.volname])
            if servers_count == (num_of_servers - num_of_nodes_to_bring_down):
                break
            sleep(2)
            count += 1

        # confirm that quorum is not met, brick process should be down
        bricks_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(bricks_list, "Failed to get the brick list")
        bricks_to_check = bricks_list[0:num_of_nodes_to_bring_down]
        ret = are_bricks_offline(self.mnode, self.volname, bricks_to_check)
        self.assertTrue(
            ret, "Unexpected: Server quorum is not met, "
            "Bricks are up")
        g.log.info("Server quorum is not met, bricks are down as expected")

        # try add brick operation, which should fail
        num_bricks_to_add = 1
        brick = form_bricks_list(self.mnode, self.volname, num_bricks_to_add,
                                 self.servers, self.all_servers_info)
        ret, _, _ = add_brick(self.mnode, self.volname, brick)
        self.assertNotEqual(ret, 0, ("Unexpected: add brick is success, "
                                     "when quorum is not met"))
        g.log.info("Add brick is failed as expected, when quorum is not met")

        # confirm that, newly added brick is not part of volume
        bricks_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(bricks_list, "Failed to get the brick list")
        if brick in bricks_list:
            ret = False
            self.assertTrue(ret, ("Unexpected: add brick is success, "
                                  "when quorum is not met"))
        g.log.info("Add brick is failed as expected, when quorum is not met")

        # set cluster.server-quorum-type as none
        ret = set_volume_options(self.mnode, self.volname,
                                 {'cluster.server-quorum-type': 'none'})
        self.assertTrue(ret, ("Failed to set the quorum type as a server"
                              " on volume %s", self.volname))
        g.log.info("Able to set server quorum successfully on volume %s",
                   self.volname)
Exemplo n.º 30
0
    def test_rebalance_status_from_newly_probed_node(self):

        # Peer probe first 3 servers
        servers_info_from_three_nodes = {}
        for server in self.servers[0:3]:
            servers_info_from_three_nodes[server] = self.all_servers_info[
                server]
            # Peer probe the first 3 servers
            ret, _, _ = peer_probe(self.mnode, server)
            self.assertEqual(ret, 0, "Peer probe failed to %s" % server)

        self.volume['servers'] = self.servers[0:3]
        # create a volume using the first 3 nodes
        ret = setup_volume(self.mnode,
                           servers_info_from_three_nodes,
                           self.volume,
                           force=True)
        self.assertTrue(
            ret, "Failed to create"
            "and start volume %s" % self.volname)

        # Mounting a volume
        ret = self.mount_volume(self.mounts)
        self.assertTrue(ret, "Volume mount failed for %s" % self.volname)

        # Checking volume mounted or not
        ret = is_mounted(self.volname, self.mounts[0].mountpoint, self.mnode,
                         self.mounts[0].client_system, self.mount_type)
        self.assertTrue(
            ret, "Volume not mounted on mount point: %s" %
            self.mounts[0].mountpoint)
        g.log.info("Volume %s mounted on %s", self.volname,
                   self.mounts[0].mountpoint)

        # run IOs
        g.log.info("Starting IO on all mounts...")
        self.counter = 1
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = (
                "python %s create_deep_dirs_with_files "
                "--dirname-start-num %d "
                "--dir-depth 10 "
                "--dir-length 5 "
                "--max-num-of-dirs 3 "
                "--num-of-files 100 %s" %
                (self.script_upload_path, self.counter, mount_obj.mountpoint))
            ret = g.run(mount_obj.client_system, cmd)
            self.assertEqual(ret, 0,
                             "IO failed on %s" % mount_obj.client_system)
            self.counter = self.counter + 10

        # add a brick to the volume and start rebalance
        brick_to_add = form_bricks_list(self.mnode, self.volname, 1,
                                        self.servers[0:3],
                                        servers_info_from_three_nodes)
        ret, _, _ = add_brick(self.mnode, self.volname, brick_to_add)
        self.assertEqual(ret, 0, "Failed to add a brick to %s" % self.volname)

        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to start rebalance")

        # peer probe a new node from existing cluster
        ret, _, _ = peer_probe(self.mnode, self.servers[3])
        self.assertEqual(ret, 0, "Peer probe failed")

        ret = get_rebalance_status(self.servers[3], self.volname)
        self.assertIsNone(ret, "Failed to get rebalance status")