def restart_file_volume(file_vol, sleep_time=120):
    """Restars file volume service.

    Args:
        file_vol (str): name of a file volume
    """
    gluster_volume_status = get_volume_status(
        "auto_get_gluster_endpoint", file_vol)
    if not gluster_volume_status:
        raise AssertionError("failed to get gluster volume status")

    g.log.info("Gluster volume %s status\n%s : " % (
        file_vol, gluster_volume_status)
    )

    ret, out, err = volume_stop("auto_get_gluster_endpoint", file_vol)
    if ret != 0:
        err_msg = "Failed to stop gluster volume %s. error: %s" % (
            file_vol, err)
        g.log.error(err_msg)
        raise AssertionError(err_msg)

    # Explicit wait to stop ios and pvc creation for 2 mins
    time.sleep(sleep_time)

    ret, out, err = volume_start(
        "auto_get_gluster_endpoint", file_vol, force=True)
    if ret != 0:
        err_msg = "failed to start gluster volume %s error: %s" % (
            file_vol, err)
        g.log.error(err_msg)
        raise AssertionError(err_msg)

    ret, out, err = volume_status("auto_get_gluster_endpoint", file_vol)
    if ret != 0:
        err_msg = ("Failed to get status for gluster volume %s error: %s" % (
            file_vol, err))
        g.log.error(err_msg)
        raise AssertionError(err_msg)
    def test_volume_op(self):

        # Starting a non existing volume should fail
        ret, _, _ = volume_start(self.mnode, "no_vol", force=True)
        self.assertNotEqual(ret, 0, "Expected: It should fail to Start a non"
                            " existing volume. Actual: Successfully started "
                            "a non existing volume")
        g.log.info("Starting a non existing volume is failed")

        # Stopping a non existing volume should fail
        ret, _, _ = volume_stop(self.mnode, "no_vol", force=True)
        self.assertNotEqual(ret, 0, "Expected: It should fail to stop "
                            "non-existing volume. Actual: Successfully "
                            "stopped a non existing volume")
        g.log.info("Stopping a non existing volume is failed")

        # Deleting a non existing volume should fail
        self.assertTrue(
            volume_delete(self.mnode, "no_vol", xfail=True),
            "Expected: It should fail to delete a "
            "non existing volume. Actual:Successfully deleted "
            "a non existing volume"
        )

        # Detach a server and try to create volume with node
        # which is not in cluster
        ret, _, _ = peer_detach(self.mnode, self.servers[1])
        self.assertEqual(ret, 0, ("Peer detach is failed"))
        g.log.info("Peer detach is successful")

        num_of_bricks = len(self.servers)
        bricks_list = form_bricks_list(self.mnode, self.volname, num_of_bricks,
                                       self.servers, self.all_servers_info)

        ret, _, _ = volume_create(self.mnode, self.volname, bricks_list)
        self.assertNotEqual(ret, 0, "Successfully created volume with brick "
                            "from which is not a part of node")
        g.log.info("Creating a volume with brick from node which is not part "
                   "of cluster is failed")

        # Peer probe the detached server
        ret, _, _ = peer_probe(self.mnode, self.servers[1])
        self.assertEqual(ret, 0, ("Peer probe is failed"))
        g.log.info("Peer probe is successful")

        # Create and start a volume
        ret = setup_volume(self.mnode, self.all_servers_info, self.volume,
                           force=True)
        self.assertTrue(ret, "Failed to create the volume")
        g.log.info("Successfully created and started the volume")

        # Starting already started volume should fail
        ret, _, _ = volume_start(self.mnode, self.volname)
        self.assertNotEqual(ret, 0, "Expected: It should fail to start a "
                            "already started volume. Actual:Successfully"
                            " started a already started volume ")
        g.log.info("Starting a already started volume is Failed.")

        # Deleting a volume without stopping should fail
        self.assertTrue(
            volume_delete(self.mnode, self.volname, xfail=True),
            "Expected: It should fail to delete a volume"
            " without stopping. Actual: Successfully "
            "deleted a volume without stopping it"
        )
        g.log.info("Expected: volume delete should fail without "
                   "stopping volume: %s", self.volname)

        # Stopping a volume should succeed
        ret, _, _ = volume_stop(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("volume stop is failed"))
        g.log.info("Volume stop is success")

        # Stopping a already stopped volume should fail
        ret, _, _ = volume_stop(self.mnode, self.volname)
        self.assertNotEqual(ret, 0, "Expected: It should fail to stop a "
                            "already stopped volume . Actual: Successfully"
                            "stopped a already stopped volume")
        g.log.info("Volume stop is failed on already stopped volume")

        # Deleting a volume should succeed
        self.assertTrue(
            volume_delete(self.mnode, self.volname),
            "Volume delete is failed"
        )

        # Deleting an already deleted volume should fail
        self.assertTrue(
            volume_delete(self.mnode, self.volname, xfail=True),
            "Expected: It should fail to delete an "
            "already deleted volume. Actual:Successfully "
            "deleted an already deleted volume"
        )

        # Volume info command should succeed
        ret = get_volume_info(self.mnode)
        self.assertIsNotNone(ret, "volume info command failed")
        g.log.info("Volume info command is success")
    def test_glustershd_with_restarting_glusterd(self):
        """
        Test Script to verify the self heal daemon process with restarting
        glusterd and rebooting the server

        * stop all volumes
        * restart glusterd - should not run self heal daemon process
        * start replicated involved volumes
        * single self heal daemon process running
        * restart glusterd
        * self heal daemon pid will change
        * bring down brick and restart glusterd
        * self heal daemon pid will change and its different from previous
        * brought up the brick

        """

        nodes = self.volume['servers']

        # stop the volume
        g.log.info("Stopping the volume %s" % self.volname)
        ret = volume_stop(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to stop volume %s" % self.volname))
        g.log.info("Successfully stopped volume %s" % self.volname)

        # check the self heal daemon process after stopping the volume
        g.log.info("Verifying the self heal daemon process for "
                   "volume %s" % self.volname)
        ret = are_all_self_heal_daemons_are_online(self.mnode, self.volname)
        self.assertFalse(ret, ("Self Heal Daemon process is still running "
                               "even after stopping volume %s" % self.volname))
        g.log.info("Self Heal Daemon is not running after stopping  "
                   "volume %s" % self.volname)

        # restart glusterd service on all the servers
        g.log.info("Restarting glusterd on all servers %s", nodes)
        ret = restart_glusterd(nodes)
        self.assertTrue(ret, ("Failed to restart glusterd on all nodes %s",
                              nodes))
        g.log.info("Successfully restarted glusterd on all nodes %s",
                   nodes)

        # check the self heal daemon process after restarting glusterd process
        g.log.info("Starting to get self-heal daemon process on"
                   " nodes %s" % nodes)
        ret = are_all_self_heal_daemons_are_online(self.mnode, self.volname)
        self.assertFalse(ret, ("Self Heal Daemon process is running after "
                               "glusterd restart with volume %s in "
                               "stop state" % self.volname))
        g.log.info("Self Heal Daemon is not running after stopping  "
                   "volume and restarting glusterd %s" % self.volname)

        # start the volume
        g.log.info("Starting the volume %s" % self.volname)
        ret = volume_start(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to start volume %s" % self.volname))
        g.log.info("Volume %s started successfully" % self.volname)

        # Verfiy glustershd process releases its parent process
        g.log.info("Checking whether glustershd process is daemonized or not")
        ret = is_shd_daemonized(nodes)
        self.assertTrue(ret, ("Either No self heal daemon process found or "
                              "more than One self heal daemon process found"))
        g.log.info("Single self heal daemon process on all nodes %s" % nodes)

        # get the self heal daemon pids after starting volume
        g.log.info("Starting to get self-heal daemon process "
                   "on nodes %s" % nodes)
        ret, pids = get_self_heal_daemon_pid(nodes)
        self.assertTrue(ret, ("Either No self heal daemon process found or "
                              "more than One self heal daemon process found"))
        g.log.info("Succesfull in getting self heal daemon pids")
        glustershd_pids = pids

        # get the bricks for the volume
        g.log.info("Fetching bricks for the volume : %s" % self.volname)
        bricks_list = get_all_bricks(self.mnode, self.volname)
        g.log.info("Brick List : %s" % bricks_list)

        # validate the bricks present in volume info
        # with glustershd server volume file
        g.log.info("Starting parsing file %s on "
                   "node %s" % (self.GLUSTERSHD, self.mnode))
        ret = do_bricks_exist_in_shd_volfile(self.mnode, self.volname,
                                             bricks_list)
        self.assertTrue(ret, ("Brick List from volume info is different from "
                              "glustershd server volume file. "
                              "Please check log file for details."))
        g.log.info("Successfully parsed %s file" % self.GLUSTERSHD)

        # restart glusterd service on all the servers
        g.log.info("Restarting glusterd on all servers %s", nodes)
        ret = restart_glusterd(nodes)
        self.assertTrue(ret, ("Failed to restart glusterd on all nodes %s",
                              nodes))
        g.log.info("Successfully restarted glusterd on all nodes %s",
                   nodes)

        # Verify volume's all process are online for 60 sec
        g.log.info("Verifying volume's all process are online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname,
                                                   60)
        self.assertTrue(ret, ("Volume %s : All process are not "
                              "online", self.volname))
        g.log.info("Successfully Verified volume %s processes are online",
                   self.volname)

        # Verfiy glustershd process releases its parent process
        ret = is_shd_daemonized(nodes)
        self.assertTrue(ret, ("Either No self heal daemon process found or "
                              "more than One self heal daemon process found"))

        # check the self heal daemon process after starting volume and
        # restarting glusterd process
        g.log.info("Starting to get self-heal daemon process "
                   "on nodes %s" % nodes)
        ret, pids = get_self_heal_daemon_pid(nodes)
        self.assertTrue(ret, ("Either No self heal daemon process found or "
                              "more than One self heal daemon process found"))
        glustershd_pids_after_glusterd_restart = pids

        self.assertNotEqual(glustershd_pids,
                            glustershd_pids_after_glusterd_restart,
                            ("Self Heal Daemon pids are same after "
                             "restarting glusterd process"))
        g.log.info("Self Heal Daemon process are different before and "
                   "after restarting glusterd process")

        # select bricks to bring offline
        bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
            self.mnode, self.volname))
        bricks_to_bring_offline = filter(None, (
            bricks_to_bring_offline_dict['hot_tier_bricks'] +
            bricks_to_bring_offline_dict['cold_tier_bricks'] +
            bricks_to_bring_offline_dict['volume_bricks']))

        # bring bricks offline
        g.log.info("Going to bring down the brick process "
                   "for %s" % bricks_to_bring_offline)
        ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
        self.assertTrue(ret, ("Failed to bring down the bricks. Please "
                              "check the log file for more details."))
        g.log.info("Brought down the brick process "
                   "for %s succesfully" % bricks_to_bring_offline)

        # restart glusterd after brought down the brick
        g.log.info("Restart glusterd on all servers %s", nodes)
        ret = restart_glusterd(nodes)
        self.assertTrue(ret, ("Failed to restart glusterd on all nodes %s",
                              nodes))
        g.log.info("Successfully restarted glusterd on all nodes %s",
                   nodes)

        # Verify volume's all process are online for 60 sec
        g.log.info("Verifying volume's all process are online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname,
                                                   60)
        self.assertTrue(ret, ("Volume %s : All process are not "
                              "online", self.volname))
        g.log.info("Successfully Verified volume %s processes are online",
                   self.volname)

        # Verfiy glustershd process releases its parent process
        ret = is_shd_daemonized(nodes)
        self.assertTrue(ret, ("Either No self heal daemon process found or "
                              "more than One self heal daemon process found"))

        # check the self heal daemon process after killing brick and
        # restarting glusterd process
        g.log.info("Starting to get self-heal daemon process "
                   "on nodes %s" % nodes)
        ret, pids = get_self_heal_daemon_pid(nodes)
        self.assertTrue(ret, ("Either No self heal daemon process found or "
                              "more than One self heal daemon process found"))
        glustershd_pids_after_killing_brick = pids

        self.assertNotEqual(glustershd_pids_after_glusterd_restart,
                            glustershd_pids_after_killing_brick,
                            ("Self Heal Daemon process are same from before "
                             "killing the brick,restarting glusterd process"))
        g.log.info("Self Heal Daemon process are different after killing the "
                   "brick, restarting the glusterd process")

        # brought the brick online
        g.log.info("bringing up the bricks : %s online" %
                   bricks_to_bring_offline)
        ret = bring_bricks_online(self.mnode, self.volname,
                                  bricks_to_bring_offline)
        self.assertTrue(ret, ("Failed to brought the bricks online"))
        g.log.info("Successfully brought the bricks online")

        # check all bricks are online
        g.log.info("Verifying all bricka are online or not.....")
        ret = are_bricks_online(self.mnode, self.volname,
                                bricks_to_bring_offline)
        self.assertTrue(ret, ("Not all bricks are online"))
        g.log.info("All bricks are online.")
Пример #4
0
    def test_profile_operations(self):

        # pylint: disable=too-many-statements
        """
        Test Case:
        1) Create a volume and start it.
        2) Mount volume on client and start IO.
        3) Start profile info on the volume.
        4) Run profile info with different parameters
           and see if all bricks are present or not.
        5) Stop profile on the volume.
        6) Create another volume.
        7) Start profile without starting the volume.
        """

        # Timestamp of current test case of start time
        ret, test_timestamp, _ = g.run_local('date +%s')
        test_timestamp = test_timestamp.strip()

        # Start IO on mount points.
        g.log.info("Starting IO on all mounts...")
        self.all_mounts_procs = []
        counter = 1
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("python %s create_deep_dirs_with_files "
                   "--dir-depth 4 "
                   "--dir-length 6 "
                   "--dirname-start-num %d "
                   "--max-num-of-dirs 3 "
                   "--num-of-files 5 %s" %
                   (self.script_upload_path, counter, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
            counter += 1

        # Start profile on volume.
        ret, _, _ = profile_start(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, "Failed to start profile on volume: %s" % self.volname)
        g.log.info("Successfully started profile on volume: %s", self.volname)

        # Getting and checking output of profile info.
        ret, out, _ = profile_info(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, "Failed to run profile info on volume: %s" % self.volname)
        g.log.info("Successfully executed profile info on volume: %s",
                   self.volname)

        # Checking if all bricks are present in profile info.
        brick_list = get_all_bricks(self.mnode, self.volname)
        for brick in brick_list:
            self.assertTrue(
                brick in out,
                "Brick %s not a part of profile info output." % brick)
            g.log.info("Brick %s showing in profile info output.", brick)

        # Running profile info with different profile options.
        profile_options = [
            'peek', 'incremental', 'clear', 'incremental peek', 'cumulative'
        ]
        for option in profile_options:

            # Getting and checking output of profile info.
            ret, out, _ = profile_info(self.mnode,
                                       self.volname,
                                       options=option)
            self.assertEqual(
                ret, 0, "Failed to run profile info %s on volume: %s" %
                (option, self.volname))
            g.log.info("Successfully executed profile info %s on volume: %s",
                       option, self.volname)

            # Checking if all bricks are present in profile info peek.
            for brick in brick_list:
                self.assertTrue(
                    brick in out, "Brick %s not a part of profile"
                    " info %s output." % (brick, option))
                g.log.info("Brick %s showing in profile info %s output.",
                           brick, option)

        # Stop profile on volume.
        ret, _, _ = profile_stop(self.mnode, self.volname)
        self.assertEqual(ret, 0,
                         "Failed to stop profile on volume: %s" % self.volname)
        g.log.info("Successfully stopped profile on volume: %s", self.volname)

        # Validate IO
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")
        g.log.info("IO validation complete.")

        # Create and start a volume
        self.volume['name'] = "volume_2"
        self.volname = "volume_2"
        ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
        self.assertTrue(ret, "Failed to create and start volume")
        g.log.info("Successfully created and started volume_2")

        # Stop volume
        ret, _, _ = volume_stop(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to stop the volume %s" % self.volname)
        g.log.info("Volume %s stopped successfully", self.volname)

        # Start profile on volume.
        ret, _, _ = profile_start(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, "Failed to start profile on volume: %s" % self.volname)
        g.log.info("Successfully started profile on volume: %s", self.volname)

        # Start volume
        ret, _, _ = volume_start(self.mnode, self.volname)
        self.assertEqual(ret, 0,
                         "Failed to start the volume %s" % self.volname)
        g.log.info("Volume %s started successfully", self.volname)

        # Chekcing for core files.
        ret = is_core_file_created(self.servers, test_timestamp)
        self.assertTrue(ret, "glusterd service should not crash")
        g.log.info("No core file found, glusterd service running "
                   "successfully")

        # Checking whether glusterd is running or not
        ret = is_glusterd_running(self.servers)
        self.assertEqual(ret, 0, "Glusterd has crashed on nodes.")
        g.log.info("No glusterd crashes observed.")
    def test_targetcli_when_block_hosting_volume_down(self):
        """Validate no inconsistencies occur in targetcli when block volumes
           are created with one block hosting volume down."""
        h_node, h_server = self.heketi_client_node, self.heketi_server_url
        cmd = ("targetcli ls | egrep '%s' || echo unavailable")
        error_msg = ("targetcli has inconsistencies when block devices are "
                     "created with one block hosting volume %s is down")

        # Delete BHV which has no BV or fill it completely
        bhv_list = get_block_hosting_volume_list(h_node, h_server).keys()
        for bhv in bhv_list:
            bhv_info = heketi_volume_info(h_node, h_server, bhv, json=True)
            if not bhv_info["blockinfo"].get("blockvolume", []):
                heketi_volume_delete(h_node, h_server, bhv)
                continue
            free_size = bhv_info["blockinfo"].get("freesize", 0)
            if free_size:
                bv = heketi_volume_create(h_node,
                                          h_server,
                                          free_size,
                                          json=True)
                self.addCleanup(heketi_volume_delete, h_node, h_server,
                                bv["id"])

        # Create BV
        bv = heketi_blockvolume_create(h_node, h_server, 2, json=True)
        self.addCleanup(heketi_blockvolume_delete, h_node, h_server, bv["id"])

        # Bring down BHV
        bhv_name = get_block_hosting_volume_name(h_node, h_server, bv["id"])
        ret, out, err = volume_stop("auto_get_gluster_endpoint", bhv_name)
        if ret != 0:
            err_msg = "Failed to stop gluster volume %s. error: %s" % (
                bhv_name, err)
            g.log.error(err_msg)
            raise AssertionError(err_msg)
        self.addCleanup(podcmd.GlustoPod()(volume_start),
                        "auto_get_gluster_endpoint", bhv_name)

        ocp_node = self.ocp_master_node[0]
        gluster_block_svc = "gluster-block-target"
        self.addCleanup(wait_for_service_status_on_gluster_pod_or_node,
                        ocp_node,
                        gluster_block_svc,
                        "active",
                        "exited",
                        gluster_node=self.gluster_servers[0])
        self.addCleanup(restart_service_on_gluster_pod_or_node, ocp_node,
                        gluster_block_svc, self.gluster_servers[0])
        for condition in ("continue", "break"):
            restart_service_on_gluster_pod_or_node(
                ocp_node,
                gluster_block_svc,
                gluster_node=self.gluster_servers[0])
            wait_for_service_status_on_gluster_pod_or_node(
                ocp_node,
                gluster_block_svc,
                "active",
                "exited",
                gluster_node=self.gluster_servers[0])

            targetcli = cmd_run_on_gluster_pod_or_node(ocp_node,
                                                       cmd % bv["id"],
                                                       self.gluster_servers[0])
            if condition == "continue":
                self.assertEqual(targetcli, "unavailable",
                                 error_msg % bhv_name)
            else:
                self.assertNotEqual(targetcli, "unavailable",
                                    error_msg % bhv_name)
                break

            # Bring up the same BHV
            ret, out, err = volume_start("auto_get_gluster_endpoint", bhv_name)
            if ret != 0:
                err = "Failed to start gluster volume %s on %s. error: %s" % (
                    bhv_name, h_node, err)
                raise exceptions.ExecutionError(err)
Пример #6
0
    def test_volume_create_start_stop_start(self):
        """Tests volume create, start, status, stop, start.
        Also Validates whether all the brick process are running after the
        start of the volume.
        """
        # Verify volume processes are online
        g.log.info("Verify volume %s processes are online", self.volname)
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Volume %s : All process are not online" %
                              self.volname))
        g.log.info("Successfully Verified volume %s processes are online",
                   self.volname)

        # Stop Volume
        g.log.info("Stopping Volume %s", self.volname)
        ret, _, _ = volume_stop(self.mnode, self.volname, force=True)
        self.assertEqual(ret, 0, "Failed to stop volume %s" % self.volname)
        g.log.info("Successfully stopped volume %s", self.volname)

        # Start Volume
        g.log.info("Starting Volume %s", self.volname)
        ret, _, _ = volume_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to start volume %s" % self.volname)
        g.log.info("Successfully started volume %s", self.volname)

        # Wait for volume processes to be online
        g.log.info("Wait for volume processes to be online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to wait for volume %s processes to "
                              "be online", self.volname))
        g.log.info("Successful in waiting for volume %s processes to be "
                   "online", self.volname)

        # Log Volume Info and Status
        g.log.info("Logging Volume %s Info and Status", self.volname)
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to Log volume %s info and status",
                              self.volname))
        g.log.info("Successfully logged Volume %s Info and Status",
                   self.volname)

        # Verify volume's all process are online
        g.log.info("Verify volume %s processes are online", self.volname)
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Volume %s : All process are not online" %
                              self.volname))
        g.log.info("Successfully verified volume %s processes are online",
                   self.volname)

        # Log Volume Info and Status
        g.log.info("Logging Volume %s Info and Status", self.volname)
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to Log volume %s info and status",
                              self.volname))
        g.log.info("Successfully logged Volume %s Info and Status",
                   self.volname)

        # Check if glusterd is running on all servers(expected: active)
        g.log.info("Check if glusterd is running on all servers"
                   "(expected: active)")
        ret = is_glusterd_running(self.servers)
        self.assertEqual(ret, 0, "Glusterd is not running on all servers")
        g.log.info("Glusterd is running on all the servers")
Пример #7
0
    def test_volume_start_stop_while_rebalance_is_in_progress(self):
        # DHT Layout and hash validation
        for mount_obj in self.mounts:
            g.log.debug("Verifying hash layout values %s:%s",
                        mount_obj.client_system, mount_obj.mountpoint)
            ret = validate_files_in_dir(mount_obj.client_system,
                                        mount_obj.mountpoint,
                                        test_type=FILE_ON_HASHED_BRICKS,
                                        file_type=FILETYPE_FILES |
                                        FILETYPE_DIRS)
            self.assertTrue(ret, "Hash Layout Values: Fail")
            g.log.info("Hash layout values are verified %s:%s",
                       mount_obj.client_system, mount_obj.mountpoint)

        # Log Volume Info and Status before expanding the volume.
        g.log.info("Logging volume info and Status before expanding volume")
        ret = log_volume_info_and_status(self.mnode, self.volname)
        g.log.error(ret, "Logging volume info and status failed on "
                         "volume %s", self.volname)
        g.log.info("Logging volume info and status was successful for volume "
                   "%s", self.volname)

        # Expanding volume by adding bricks to the volume
        g.log.info("Start adding bricks to volume")
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info,)
        self.assertTrue(ret, ("Failed to expand the volume on volume %s ",
                              self.volname))
        g.log.info("Expanding volume is successful on volume %s", self.volname)

        # Wait for gluster processes to come online
        g.log.info("Wait for gluster processes to come online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to wait for volume %s processes to "
                              "be online", self.volname))
        g.log.info("Successful in waiting for volume %s processes to be "
                   "online", self.volname)

        # Log Volume Info and Status after expanding the volume
        g.log.info("Logging volume info and Status after expanding volume")
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Error: Volume processes failed to come up for "
                              "%s", self.volname))
        g.log.info("All processes are up for volume %s", self.volname)

        # Wait for gluster processes to come online
        g.log.info("Wait for gluster processes to come online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Error: Volume processes failed to come up for "
                              "%s", self.volname))
        g.log.info("All processes are up for volume %s", self.volname)

        # Verify volume's all process are online
        g.log.info("Verifying volume's all process are online")
        ret = verify_all_process_of_volume_are_online(self.mnode,
                                                      self.volname)
        self.assertTrue(ret, ("Volume %s : All process are not online ",
                              self.volname))
        g.log.info("Volume %s : All process are online", self.volname)

        # Start Rebalance
        g.log.info("Starting rebalance on the volume")
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to start rebalance on the volume "
                                  "%s", self.volname))
        g.log.info("Successfully started rebalance on the volume %s ",
                   self.volname)

        # Logging rebalance status
        g.log.info("Logging rebalance status")
        status_info = get_rebalance_status(self.mnode, self.volname)
        status = status_info['aggregate']['statusStr']

        self.assertIn('in progress', status,
                      "Rebalance process is not running")
        g.log.info("Rebalance process is running")

        ret, out, err = volume_stop(self.mnode, self.volname)
        g.log.debug("Rebalance info: %s", out)

        self.assertIn("rebalance session is in progress", err, " Volume "
                      "stopped successfully while rebalance session is in "
                      "progress")
        g.log.info("Volume stop failed as rebalance session is in "
                   "progress")

        # Check volume info to check the status of volume
        g.log.info("Checking volume info for the volume status")
        status_info = get_volume_info(self.mnode, self.volname)
        status = status_info[self.volname]['statusStr']
        self.assertIn('Started', status, ("Volume %s state is \"Stopped\"",
                                          self.volname))
        g.log.info("Volume %s state is \"Started\"", self.volname)
    def test_no_glustershd_with_distribute(self):
        """
        Test Script to verify the glustershd server vol file
        has only entries for replicate volumes

        * Create multiple volumes and start all volumes
        * Check the glustershd processes - Only 1 glustershd should be listed
        * Stop all volumes
        * Check the glustershd processes - No glustershd should be running
        * Start the distribute volume only
        * Check the glustershd processes - No glustershd should be running

        """

        nodes = self.servers

        # check the self-heal daemon process
        g.log.info("Starting to get self-heal daemon process on "
                   "nodes %s", nodes)
        ret, pids = get_self_heal_daemon_pid(nodes)
        self.assertTrue(ret, ("Either no self heal daemon process found or "
                              "more than One self heal daemon process "
                              "found : %s" % pids))
        g.log.info(
            "Successful in getting single self heal daemon process"
            " on all nodes %s", nodes)

        # stop all the volumes
        g.log.info("Going to stop all the volumes")
        volume_list = get_volume_list(self.mnode)
        for volume in volume_list:
            g.log.info("Stopping Volume : %s", volume)
            ret = volume_stop(self.mnode, volume)
            self.assertTrue(ret, ("Failed to stop volume %s" % volume))
            g.log.info("Successfully stopped volume %s", volume)
        g.log.info("Successfully stopped all the volumes")

        # check the self-heal daemon process after stopping all volumes
        g.log.info("Starting to get self-heal daemon process on "
                   "nodes %s", nodes)
        ret, pids = get_self_heal_daemon_pid(nodes)
        self.assertFalse(ret, ("Self heal daemon process is still running "
                               "after stopping all volumes "))
        for node in pids:
            self.assertEqual(pids[node][0], -1, ("Self heal daemon is still "
                                                 "running on node %s even "
                                                 "after stoppong all "
                                                 "volumes" % node))
        g.log.info("EXPECTED: No self heal daemon process is "
                   "running after stopping all volumes")

        # start the distribute volume only
        for volume in volume_list:
            volume_type_info = get_volume_type_info(self.mnode, volume)
            volume_type = (volume_type_info['volume_type_info']['typeStr'])
            if volume_type == 'Distribute':
                g.log.info("starting to start distribute volume: %s", volume)
                ret = volume_start(self.mnode, volume)
                self.assertTrue(ret, ("Failed to start volume %s" % volume))
                g.log.info("Successfully started volume %s", volume)
                break

        # check the self-heal daemon process after starting distribute volume
        g.log.info("Starting to get self-heal daemon process on "
                   "nodes %s", nodes)
        ret, pids = get_self_heal_daemon_pid(nodes)
        self.assertFalse(ret, ("Self heal daemon process is still running "
                               "after stopping all volumes "))
        for node in pids:
            self.assertEqual(pids[node][0], -1, ("Self heal daemon is still "
                                                 "running on node %s even "
                                                 "after stopping all "
                                                 "volumes" % node))
        g.log.info("EXPECTED: No self heal daemon process is running "
                   "after stopping all volumes")
Пример #9
0
    def test_glustershd_on_newly_probed_server(self):
        """
        Test script to verify glustershd process on newly probed server

        * check glustershd process - only 1 glustershd process should
          be running
        * Add new node to cluster
        * check glustershd process - only 1 glustershd process should
          be running on all servers inclusing newly probed server
        * stop the volume
        * add another node to cluster
        * check glustershd process - glustershd process shouldn't be running
          on servers including newly probed server
        * start the volume
        * check glustershd process - only 1 glustershd process should
          be running on all servers inclusing newly probed server

        """
        # pylint: disable=too-many-statements

        nodes = self.volume['servers'][:-2]

        # check the self-heal daemon process
        g.log.info("Starting to get self heal daemon process on "
                   "nodes %s", nodes)
        ret, pids = get_self_heal_daemon_pid(nodes)
        self.assertTrue(ret, ("Either no self heal daemon process found or "
                              "more than one self heal daemon process "
                              "found : %s" % pids))
        g.log.info(
            "Successful in getting single self heal daemon process"
            " on all nodes %s", nodes)

        # Add new node to the cluster
        g.log.info("Peer probe for %s", self.extra_servers[0])
        ret = peer_probe_servers(self.mnode, self.extra_servers[0])
        self.assertTrue(
            ret, "Failed to peer probe server : %s" % self.extra_servers[0])
        g.log.info(
            "Peer probe success for %s and all peers are in "
            "connected state", self.extra_servers[0])
        nodes.append(self.extra_servers[0])

        # check the self-heal daemon process and it should be running on
        # newly probed servers
        g.log.info("Starting to get self-heal daemon process on "
                   "nodes %s", nodes)
        ret, pids = get_self_heal_daemon_pid(nodes)
        self.assertTrue(ret, ("Either no self heal daemon process found or "
                              "more than one self heal daemon process "
                              "found : %s" % pids))
        g.log.info(
            "Successful in getting single self heal daemon process"
            " on all nodes %s", nodes)

        # stop the volume
        g.log.info("Stopping the volume %s", self.volname)
        ret = volume_stop(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to stop volume %s" % self.volname))
        g.log.info("Successfully stopped volume %s", self.volname)

        # Add another new node to the cluster
        g.log.info("peer probe for %s", self.extra_servers[1])
        ret = peer_probe_servers(self.mnode, self.extra_servers[1])
        self.assertTrue(
            ret, "Failed to peer probe server : %s" % self.extra_servers[1])
        g.log.info(
            "Peer probe success for %s and all peers are in "
            "connected state", self.extra_servers[1])
        nodes.append(self.extra_servers[1])

        # check the self-heal daemon process after stopping volume and
        # no self heal daemon should be running including newly probed node
        g.log.info("Starting to get self-heal daemon process on "
                   "nodes %s", nodes)
        ret, pids = get_self_heal_daemon_pid(nodes)
        self.assertFalse(ret, ("Self Heal Daemon process is running even "
                               "after stopping volume %s" % self.volname))
        for node in pids:
            self.assertEqual(
                pids[node][0], -1,
                "Self Heal Daemon is still running on node %s even "
                "after stopping all volumes" % node)
        g.log.info("Expected : No self heal daemon process is running "
                   "after stopping all volumes")

        # start the volume
        g.log.info("Starting volume %s", self.volname)
        ret = volume_start(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to start volume  %s" % self.volname))
        g.log.info("Volume %s started successfully", self.volname)

        # Verify volume's all process are online for 60 sec
        g.log.info("Verifying volume's all process are online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname,
                                                   60)
        self.assertTrue(ret, ("Volume %s : All process are not "
                              "online", self.volname))
        g.log.info("Successfully Verified volume %s processes are online",
                   self.volname)

        # Verfiy glustershd process releases its parent process
        g.log.info("verifying self heal daemon process is daemonized")
        ret = is_shd_daemonized(nodes)
        self.assertTrue(ret, ("Either no self heal daemon process found or "
                              "more than one self heal daemon process "
                              "found : %s" % pids))

        # check the self-heal daemon process
        g.log.info("Starting to get self-heal daemon process on "
                   "nodes %s", nodes)
        ret, pids = get_self_heal_daemon_pid(nodes)
        self.assertTrue(ret, ("Either no self heal daemon process found or "
                              "more than one self heal daemon process "
                              "found : %s" % pids))
        g.log.info(
            "Successful in getting single self heal daemon process"
            " on all nodes %s", nodes)

        # detach extra servers from the cluster
        g.log.info("peer detaching extra servers %s from cluster",
                   self.extra_servers)
        ret = peer_detach_servers(self.mnode, self.extra_servers)
        self.assertTrue(
            ret,
            "Failed to peer detach extra servers : %s" % self.extra_servers)
        g.log.info("Peer detach success for %s ", self.extra_servers)