示例#1
0
 def test_run_parallel(self):
     """Testing SSH run_parallel() method"""
     print "Running: %s - %s" % (self.id(), self.shortDescription())
     results = g.run_parallel(self.hosts, 'echo -n %s' % self.test_string)
     hosts_already_tested = []
     for host, result in results.iteritems():
         # test host is in list of hosts to test
         self.assertIn(host, self.hosts)
         # test host has not already been tested
         self.assertNotIn(host, hosts_already_tested)
         hosts_already_tested.append(host)
         print host
         rcode, rout, rerr = result
         self.assertEqual(rcode, 0)
         self.assertEqual(rout, self.test_string)
         print rout
         self.assertEqual(rerr, '')
示例#2
0
def run_bonnie(servers, directory_to_run, username="******"):
    """
    Module to run bonnie test suite on the given servers.

    Args:
        servers (list): servers in which tests to be run.
        directory_to_run (list): directory path where tests will run for
         each server.

    Kwargs:
        username (str): username. Defaults to root.

    Returns:
        bool: True, if test passes in all servers, False otherwise

    Example:
        run_bonnie(["abc.com", "def.com"], ["/mnt/test1", "/mnt/test2"])
    """

    g.log.info("Running bonnie tests on %s" % ','.join(servers))
    rt = True
    options_for_each_servers = []

    # Install bonnie test suite if not installed
    results = g.run_parallel(servers, "yum list installed bonnie++")
    for index, server in enumerate(servers):
        if results[server][0] != 0:
            ret, out, _ = g.run(server,
                                "yum list installed bonnie++ || "
                                "yum -y install bonnie++")
            if ret != 0:
                g.log.error("Failed to install bonnie on %s" % server)
                return False

        # Building options for bonnie tests
        options_list = []
        options = ""
        freemem_command = "free -g | grep Mem: | awk '{ print $2 }'"
        ret, out, _ = g.run(server, freemem_command)
        memory = int(out)
        g.log.info("Memory = %i", memory)
        options_list.append("-d %s -u %s" % (directory_to_run[index],
                                             username))
        if memory >= 8:
            options_list.append("-r 16G -s 16G -n 0 -m TEST -f -b")

        options = " ".join(options_list)
        options_for_each_servers.append(options)

    proc_list = []
    for index, server in enumerate(servers):
        bonnie_command = "bonnie++ %s" % (options_for_each_servers[index])
        proc = g.run_async(server, bonnie_command)
        proc_list.append(proc)

    for index, proc in enumerate(proc_list):
        results = proc.async_communicate()
        if results[0] != 0:
            g.log.error("Bonnie test failed on server %s" % servers[index])
            rt = False

    for index, server in enumerate(servers):
        ret, out, _ = g.run(server, "rm -rf %s/Bonnie.*"
                            % directory_to_run[index])
        if ret != 0:
            g.log.error("Failed to remove files from %s" % server)
            rt = False

    for server in servers:
        ret, out, _ = g.run(server, "yum -y remove bonnie++")
        if ret != 0:
            g.log.error("Failed to remove bonnie from %s" % server)
            return False
    return rt
示例#3
0
def run_fio(servers, directory_to_run):
    """
    Module to run fio test suite on the given servers.

    Args:
        servers (list): servers in which tests to be run.
        directory_to_run (list): directory path where tests will run for
         each server.

    Returns:
        bool: True, if test passes in all servers, False otherwise

    Example:
        run_fio(["abc.com", "def.com"], ["/mnt/test1", "/mnt/test2"])
    """

    g.log.info("Running fio tests on %s" % ','.join(servers))
    rt = True

    # Installing fio if not installed
    results = g.run_parallel(servers, "yum list installed fio")
    for index, server in enumerate(servers):
        if results[server][0] != 0:
            ret, out, _ = g.run(
                server, "yum list installed fio || "
                "yum -y install fio")
            if ret != 0:
                g.log.error("Failed to install bonnie on %s" % server)
                return False

        # building job file for running fio
        # TODO: parametrizing the fio and to get input values from user
        job_file = "/tmp/fio_job.ini"
        cmd = ("echo -e '[global]\nrw=randrw\nio_size=1g\nfsync_on_close=1\n"
               "size=4g\nbs=64k\nrwmixread=20\nopenfiles=1\nstartdelay=0\n"
               "ioengine=sync\n[write]\ndirectory=%s\nnrfiles=1\n"
               "filename_format=fio_file.$jobnum.$filenum\nnumjobs=8' "
               "> %s" % (directory_to_run[index], job_file))

        ret, _, _ = g.run(server, cmd)
        if ret != 0:
            g.log.error("Failed to create fio job file")
            rt = False

    proc_list = []
    for index, server in enumerate(servers):
        fio_command = "fio %s" % (job_file)
        proc = g.run_async(server, fio_command)
        proc_list.append(proc)

    for index, proc in enumerate(proc_list):
        results = proc.async_communicate()
        if results[0] != 0:
            g.log.error("fio test failed on server %s" % servers[index])
            rt = False

    for index, server in enumerate(servers):
        ret, out, _ = g.run(server,
                            "rm -rf %s/fio_file.*" % directory_to_run[index])
        if ret != 0:
            g.log.error("Failed to remove files from %s" % server)
            rt = False

    for index, server in enumerate(servers):
        ret, out, _ = g.run(server, "rm -rf %s" % job_file)
        if ret != 0:
            g.log.error("Failed to remove job file from %s" % server)
            rt = False

    for server in servers:
        ret, out, _ = g.run(server, "yum -y remove fio")
        if ret != 0:
            g.log.error("Failed to remove fio from %s" % server)
            return False
    return rt
示例#4
0
    def test_fops_ec_volume(self):
        # pylint: disable=too-many-branches,too-many-statements,too-many-locals
        """
        - 1.Start resource consumption tool
        - 2.Create directory dir1
        - 3.Create 5 dir and 5 files in each dir in directory 1
        - 4.Rename all file inside dir1
        - 5.Truncate at any dir in mountpoint inside dir1
        - 6.Create softlink and hardlink of files in mountpoint
        - 7.Delete op for deleting all file in one of the dirs
        - 8.chmod, chown, chgrp inside dir1
        - 9.Create tiny, small, medium nd large file
        - 10.Creating files on client side for dir1
        - 11.Validating IO's and waiting to complete
        - 12.Get areequal before killing the brick
        - 13.Killing 1st brick manually
        - 14.Get areequal after killing 1st brick
        - 15.Killing 2nd brick manually
        - 16.Get areequal after killing 2nd brick
        - 17.Getting arequal and comparing the arequals
        - 18.Deleting dir1
        """

        # Starting resource consumption using top
        log_file_mem_monitor = getcwd() + '/mem_usage.log'
        cmd = 'for i in {1..100};do top -n 1 -b|egrep \
              "RES|gluster" & free -h 2>&1 >> '                                                + \
              log_file_mem_monitor + ' ;sleep 10;done'
        g.log.info(cmd)
        for mount_obj in self.mounts:
            g.run_async(mount_obj.client_system, cmd)
        bricks_list = []

        # get the bricks from the volume
        g.log.info("Fetching bricks for the volume : %s", self.volname)
        bricks_list = get_all_bricks(self.mnode, self.volname)
        g.log.info("Brick List : %s", bricks_list)

        # Creating dir1
        cmd = ('mkdir  %s/dir1' % self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.mounts[0].client_system, cmd)
        self.assertEqual(ret, 0, "Failed to create directory1")
        g.log.info("Directory 1 created successfully for %s", self.mounts[0])

        # Create 5 dir and 5 files in each dir at mountpoint on dir1
        start = 1
        end = 5
        for mount_obj in self.mounts:
            # Number of dir and files to be created.
            dir_range = str(start) + ".." + str(end)
            file_range = str(start) + ".." + str(end)
            # Create dir 1-5 at mountpoint.
            cmd = ('mkdir %s/dir1/dir{%s};' %
                   (mount_obj.mountpoint, dir_range))
            g.run(mount_obj.client_system, cmd)

            # Create files inside each dir.
            cmd = ('touch %s/dir1/dir{%s}/file{%s};' %
                   (mount_obj.mountpoint, dir_range, file_range))
            g.run(mount_obj.client_system, cmd)

            # Increment counter so that at next client dir and files are made
            # with diff offset. Like at next client dir will be named
            # dir6, dir7...dir10. Same with files.
            start += 5
            end += 5

        # Rename all files inside dir1 at mountpoint on dir1
        clients = []
        for mount_obj in self.mounts:
            clients.append(mount_obj.client_system)
            cmd = ('cd %s/dir1/dir1/; '
                   'for FILENAME in *;'
                   'do mv $FILENAME Unix_$FILENAME; '
                   'done;' % mount_obj.mountpoint)
            g.run_parallel(clients, cmd)

        # Truncate at any dir in mountpoint inside dir1
        # start is an offset to be added to dirname to act on
        # diff files at diff clients.
        start = 1
        for mount_obj in self.mounts:
            cmd = ('cd %s/dir1/dir%s/; '
                   'for FILENAME in *;'
                   'do echo > $FILENAME; '
                   'done;' % (mount_obj.mountpoint, str(start)))
            g.run(mount_obj.client_system, cmd)

        # Create softlink and hardlink of files in mountpoint. Start is an
        # offset to be added to dirname to act on diff files at diff clients.
        start = 1
        for mount_obj in self.mounts:
            cmd = ('cd %s/dir1/dir%s; '
                   'for FILENAME in *; '
                   'do ln -s $FILENAME softlink_$FILENAME; '
                   'done;' % (mount_obj.mountpoint, str(start)))
            g.run(mount_obj.client_system, cmd)
            cmd = ('cd %s/dir1/dir%s; '
                   'for FILENAME in *; '
                   'do ln $FILENAME hardlink_$FILENAME; '
                   'done;' % (mount_obj.mountpoint, str(start + 1)))
            g.run(mount_obj.client_system, cmd)
            start += 5

        # chmod, chown, chgrp inside dir1
        # start and end used as offset to access diff files
        # at diff clients.
        start = 2
        end = 5
        for mount_obj in self.mounts:
            dir_file_range = '%s..%s' % (str(start), str(end))
            cmd = ('chmod 777 %s/dir1/dir{%s}/file{%s}' %
                   (mount_obj.mountpoint, dir_file_range, dir_file_range))
            g.run(mount_obj.client_system, cmd)

            cmd = ('chown root %s/dir1/dir{%s}/file{%s}' %
                   (mount_obj.mountpoint, dir_file_range, dir_file_range))
            g.run(mount_obj.client_system, cmd)

            cmd = ('chgrp root %s/dir1/dir{%s}/file{%s}' %
                   (mount_obj.mountpoint, dir_file_range, dir_file_range))
            g.run(mount_obj.client_system, cmd)

            start += 5
            end += 5

        # Create tiny, small, medium nd large file
        # at mountpoint. Offset to differ filenames
        # at diff clients.
        offset = 1
        for mount_obj in self.mounts:
            cmd = 'fallocate -l 100 tiny_file%s.txt' % str(offset)
            g.run(mount_obj.client_system, cmd)
            cmd = 'fallocate -l 20M small_file%s.txt' % str(offset)
            g.run(mount_obj.client_system, cmd)
            cmd = 'fallocate -l 200M medium_file%s.txt' % str(offset)
            g.run(mount_obj.client_system, cmd)
            cmd = 'fallocate -l 1G large_file%s.txt' % str(offset)
            g.run(mount_obj.client_system, cmd)
            offset += 1

        # Creating 2TB file if volume is greater
        # than equal to 3TB
        list1 = []
        command = ("df %s" % mount_obj.mountpoint)
        rcode, rout, rerr = g.run(mount_obj.client_system[0], command)
        if rcode == 0:
            list1 = rout.split("\n")[1].split()
            avail = list1[3]
            if int(avail) >= 3000000000:
                cmd = 'fallocate -l 2TB tiny_file_large.txt'
                g.run(mount_obj.client_system[0], cmd)
        g.log.error("Get mountpoint failed: %s", rerr)

        # Creating files on client side for dir1
        # Write IO
        all_mounts_procs = []
        count = 1
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 2 "
                   "--dir-length 10 "
                   "--max-num-of-dirs 5 "
                   "--num-of-files 5 %s" %
                   (self.script_upload_path, count, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)
            count = count + 10

        # Validating IO's and waiting to complete
        g.log.info("Validating IO's")
        ret = validate_io_procs(all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("Successfully validated all io's")

        # Get areequal before killing the brick
        g.log.info('Getting areequal before killing of brick...')
        ret, result_before_killing_brick = (collect_mounts_arequal(
            self.mounts[0]))
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting areequal before killing of brick ' 'is successful')

        # Kill 1st brick manually
        ret = bring_bricks_offline(self.volname, [bricks_list[1]])
        self.assertTrue(ret, 'Brick not offline')
        g.log.info('Brick is offline successfully')

        # Get areequal after killing 1st brick
        g.log.info('Getting areequal after killing of brick...')
        ret, result_after_killing_brick = (collect_mounts_arequal(
            self.mounts[0]))
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting areequal before killing of brick ' 'is successful')

        # Kill 2nd brick manually
        ret = bring_bricks_offline(self.volname, [bricks_list[3]])
        self.assertTrue(ret, 'Brick not offline')
        g.log.info('Brick is offline successfully')

        # Get areequal after killing 2nd brick
        g.log.info('Getting areequal after killing of brick...')
        ret, result_after_killing_brick_2 = (collect_mounts_arequal(
            self.mounts[0]))
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting areequal before killing of brick ' 'is successful')

        # Comparing areequals
        self.assertEqual(
            result_before_killing_brick, result_after_killing_brick,
            'Areequals are not equals before killing brick'
            'processes and after offlining 1 redundant bricks')
        g.log.info('Areequals are equals before killing brick'
                   'processes and after offlining 1 redundant bricks')

        # Comparing areequals
        self.assertEqual(result_after_killing_brick,
                         result_after_killing_brick_2,
                         'Areequals are not equals after killing 2'
                         ' bricks')
        g.log.info('Areequals are equals after offlining 2 redundant bricks')

        # Delete op for deleting all file in one of the dirs. start is being
        # used as offset like in previous testcase in dir1
        start = 1
        for mount_obj in self.mounts:
            cmd = ('cd %s/dir1/dir%s; '
                   'for FILENAME in *; '
                   'do rm -f $FILENAME; '
                   'done;' % (mount_obj.mountpoint, str(start)))
            g.run(mount_obj.client_system, cmd)
            start += 5

        # Deleting dir1
        cmd = ('rm -rf %s/dir1' % self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.mounts[0].client_system, cmd)
        self.assertEqual(ret, 0, "Failed to delete directory1")
        g.log.info("Directory 1 deleted successfully for %s", self.mounts[0])
示例#5
0
def quota_fetch_daemon_pid(nodes):
    """
    Checks if quota daemon process is running and
    return the process id's in dictionary format

    Args:
        nodes ( str|list ) : Node/Nodes of the cluster

    Returns:
        tuple : Tuple containing two elements (ret, quotad_pids).
        The first element 'ret' is of type 'bool', True if and only if
        quotad is running on all the nodes in the list and each
        node contains only one instance of quotad running.
        False otherwise.

        The second element 'quotad_pids' is of type dictonary and it
        contains the 'nodes' as the key and 'quotad PID' as the value.

        If there is NO quota daemon running on few nodes, the first element
        will be 'False' and the nodes which do not have a quota daemon running
        will have a value as '-1'.

        If there are MORE THAN ONE quota daemon for a node, the first element
        will be 'False' and the value for that node will be '-1'.

        Example:
            quota_fetch_daemon_pid(["node1", "node2"])
            (False, {'node2': ['8012'], 'node1': [-1]})

            Here 'node1' doesn't have quota daemon running. Hence, value
            of 'node1' is '-1'.
    """
    quotad_pids = {}
    _rc = True
    if not isinstance(nodes, list):
        nodes = [nodes]
    cmd = r"pgrep -f quotad | grep -v ^$$\$"
    g.log.info("Executing cmd: %s on node %s" % (cmd, nodes))
    results = g.run_parallel(nodes, cmd)
    for node in results:
        ret, out, err = results[node]
        if ret == 0:
            if len(out.strip().split("\n")) == 1:
                if not out.strip():
                    g.log.info("NO Quota daemon process found "
                               "on node %s" % node)
                    _rc = False
                    quotad_pids[node] = [-1]
                else:
                    g.log.info(
                        "Single Quota Daemon process with "
                        "pid %s found on %s",
                        out.strip().split("\n"), node)
                    quotad_pids[node] = (out.strip().split("\n"))
            else:
                g.log.info("More than One Quota daemon process "
                           "found on node %s" % node)
                _rc = False
                quotad_pids[node] = [-1]
        else:
            g.log.info("Not able to get Quota daemon process "
                       "from node %s" % node)
            _rc = False
            quotad_pids[node] = [-1]

    return _rc, quotad_pids
示例#6
0
def create_ssl_machine(servers, clients):
    """Following are the steps to create ssl machines:
            - Stop glusterd on all servers
            - Run: openssl genrsa -out /etc/ssl/glusterfs.key 2048
            - Run: openssl req -new -x509 -key /etc/ssl/glusterfs.key
                   -subj "/CN=ip's" -days 365 -out /etc/ssl/glusterfs.pem
            - copy glusterfs.pem files into glusterfs.ca from all
              the nodes(servers+clients) to all the servers
            - touch /var/lib/glusterd/secure-access
            - Start glusterd on all servers
    Args:
        servers: List of servers
        clients: List of clients

    Returns:
        bool : True if successfully created ssl machine. False otherwise.
    """
    # pylint: disable=too-many-statements, too-many-branches
    # pylint: disable=too-many-return-statements
    # Variable to collect all servers ca_file for servers
    ca_file_server = StringIO()

    # Stop glusterd on all servers
    ret = g.run_parallel(servers, "systemctl stop glusterd")
    if not ret:
        g.log.error("Failed to stop glusterd on all servers")
        return False

    # Generate key file on all servers
    cmd = "openssl genrsa -out /etc/ssl/glusterfs.key 2048"
    ret = g.run_parallel(servers, cmd)
    if not ret:
        g.log.error("Failed to create /etc/ssl/glusterfs.key "
                    "file on all servers")
        return False

    # Generate glusterfs.pem file on all servers
    for server in servers:
        _, hostname, _ = g.run(server, "hostname")
        cmd = ("openssl req -new -x509 -key /etc/ssl/glusterfs.key -subj "
               "/CN=%s -days 365 -out /etc/ssl/glusterfs.pem" % (hostname))
        ret = g.run(server, cmd)
        if not ret:
            g.log.error(
                "Failed to create /etc/ssl/glusterfs.pem "
                "file on server %s", server)
            return False

    # Copy glusterfs.pem file of all servers into ca_file_server
    for server in servers:
        conn1 = g.rpyc_get_connection(server)
        if conn1 == "None":
            g.log.error("Failed to get rpyc connection on %s", server)

        with conn1.builtin.open('/etc/ssl/glusterfs.pem') as fin:
            ca_file_server.write(fin.read())

    # Copy all ca_file_server for clients use
    ca_file_client = ca_file_server.getvalue()

    # Generate key file on all clients
    for client in clients:
        _, hostname, _ = g.run(client, "hostname -s")
        cmd = "openssl genrsa -out /etc/ssl/glusterfs.key 2048"
        ret = g.run(client, cmd)
        if not ret:
            g.log.error(
                "Failed to create /etc/ssl/glusterfs.key "
                "file on client %s", client)
            return False

        # Generate glusterfs.pem file on all clients
        cmd = ("openssl req -new -x509 -key /etc/ssl/glusterfs.key -subj "
               "/CN=%s -days 365 -out /etc/ssl/glusterfs.pem" % (client))
        ret = g.run(client, cmd)
        if not ret:
            g.log.error(
                "Failed to create /etc/ssl/glusterf.pem "
                "file on client %s", client)
            return False

        # Copy glusterfs.pem file of client to a ca_file_server
        conn2 = g.rpyc_get_connection(client)
        if conn2 == "None":
            g.log.error("Failed to get rpyc connection on %s", server)
        with conn2.builtin.open('/etc/ssl/glusterfs.pem') as fin:
            ca_file_server.write(fin.read())

        # Copy glusterfs.pem file to glusterfs.ca of client such that
        # clients shouldn't share respectives ca file each other
        cmd = "cp /etc/ssl/glusterfs.pem /etc/ssl/glusterfs.ca"
        ret, _, _ = g.run(client, cmd)
        if ret != 0:
            g.log.error("Failed to copy the glusterfs.pem to "
                        "glusterfs.ca of client")
            return False

        # Now copy the ca_file of all servers to client ca file
        with conn2.builtin.open('/etc/ssl/glusterfs.ca', 'a') as fout:
            fout.write(ca_file_client)

        # Create /var/lib/glusterd directory on clients
        ret = g.run(client, "mkdir -p /var/lib/glusterd/")
        if not ret:
            g.log.error("Failed to create directory /var/lib/glusterd/"
                        " on clients")

    # Copy ca_file_server to all servers
    for server in servers:
        conn3 = g.rpyc_get_connection(server)
        if conn3 == "None":
            g.log.error("Failed to get rpyc connection on %s", server)

        with conn3.builtin.open('/etc/ssl/glusterfs.ca', 'w') as fout:
            fout.write(ca_file_server.getvalue())

    # Touch /var/lib/glusterd/secure-access on all servers
    ret = g.run_parallel(servers, "touch /var/lib/glusterd/secure-access")
    if not ret:
        g.log.error("Failed to touch the file on servers")
        return False

    # Touch /var/lib/glusterd/secure-access on all clients
    ret = g.run_parallel(clients, "touch /var/lib/glusterd/secure-access")
    if not ret:
        g.log.error("Failed to touch the file on clients")
        return False

    # Start glusterd on all servers
    ret = g.run_parallel(servers, "systemctl start glusterd")
    if not ret:
        g.log.error("Failed to stop glusterd on servers")
        return False

    return True
示例#7
0
def cleanup_ssl_setup(servers, clients):
    """
    Following are the steps to cleanup ssl setup:
            - Stop glusterd on all servers
            - Remove folder /etc/ssl/*
            - Remove /var/lib/glusterd/*
            - Start glusterd on all servers

    Args:
        servers: List of servers
        clients: List of clients

    Returns:
        bool : True if successfully cleaned ssl machine. False otherwise.
    """
    # pylint: disable=too-many-return-statements
    _rc = True

    # Stop glusterd on all servers
    ret = g.run_parallel(servers, "systemctl stop glusterd")
    if not ret:
        _rc = False
        g.log.error("Failed to stop glusterd on all servers")

    # Remove glusterfs.key, glusterfs.pem and glusterfs.ca file
    # from all servers
    cmd = "rm -rf /etc/ssl/glusterfs*"
    ret = g.run_parallel(servers, cmd)
    if not ret:
        _rc = False
        g.log.error("Failed to remove folder /etc/ssl/glusterfs* "
                    "on all servers")

    # Remove folder /var/lib/glusterd/secure-access from servers
    cmd = "rm -rf /var/lib/glusterd/secure-access"
    ret = g.run_parallel(servers, cmd)
    if not ret:
        _rc = False
        g.log.error("Failed to remove folder /var/lib/glusterd/secure-access "
                    "on all servers")

    # Remove glusterfs.key, glusterfs.pem and glusterfs.ca file
    # from all clients
    cmd = "rm -rf /etc/ssl/glusterfs*"
    ret = g.run_parallel(clients, cmd)
    if not ret:
        _rc = False
        g.log.error("Failed to remove folder /etc/ssl/glusterfs* "
                    "on all clients")

    # Remove folder /var/lib/glusterd/secure-access from clients
    cmd = "rm -rf /var/lib/glusterd/secure-access"
    ret = g.run_parallel(clients, cmd)
    if not ret:
        _rc = False
        g.log.error("Failed to remove folder /var/lib/glusterd/secure-access "
                    "on all clients")

    # Start glusterd on all servers
    ret = g.run_parallel(servers, "systemctl start glusterd")
    if not ret:
        _rc = False
        g.log.error("Failed to stop glusterd on servers")

    return _rc
示例#8
0
    def test_brickreset_ec_volume(self):
        # pylint: disable=too-many-branches,too-many-statements,too-many-locals
        """
        - Start resource consumption tool
        - Create IO on dir2 of volume mountpoint
        - Reset brick start
        - Check if brick is offline
        - Reset brick with destination same as source with force running IO's
        - Validating IO's and waiting for it to complete on dir2
        - Remove dir2
        - Create 5 directory and 5 files in dir of mountpoint
        - Rename all files inside dir1 at mountpoint
        - Create softlink and hardlink of files in dir1 of mountpoint
        - Delete op for deleting all file in one of the dirs inside dir1
        - Change chmod, chown, chgrp
        - Create tiny, small, medium and large file
        - Create IO's
        - Validating IO's and waiting for it to complete
        - Calculate arequal before kiiling brick
        - Get brick from Volume
        - Reset brick
        - Check if brick is offline
        - Reset brick by giving a different source and dst node
        - Reset brick by giving dst and source same without force
        - Obtain hostname
        - Reset brick with dst-source same force using hostname - Successful
        - Monitor heal completion
        - Bring down other bricks to max redundancy
        - Get arequal after bringing down bricks
        - Bring bricks online
        - Reset brick by giving a same source and dst brick
        - Kill brick manually
        - Check if brick is offline
        - Reset brick by giving a same source and dst brick
        - Wait for brick to come online
        - Bring down other bricks to max redundancy
        - Get arequal after bringing down bricks
        - Bring bricks online
        - Remove brick from backend
        - Check if brick is offline
        - Reset brick by giving dst and source same without force - Successful
        - Monitor heal completion
        - Compare the arequal's calculated
        """
        # Starting resource consumption using top
        log_file_mem_monitor = getcwd() + '/mem_usage.log'
        cmd = 'for i in {1..100};do top -n 1 -b|egrep \
                "RES|gluster" & free -h 2>&1 >> '                                                  + \
            log_file_mem_monitor + ' ;sleep 10;done'
        g.log.info(cmd)
        for mount_obj in self.mounts:
            g.run_async(mount_obj.client_system, cmd)
        bricks_list = []

        # Get the bricks from the volume
        g.log.info("Fetching bricks for the volume : %s", self.volname)
        bricks_list = get_all_bricks(self.mnode, self.volname)
        g.log.info("Brick List : %s", bricks_list)

        # Creating directory2
        cmd = ('mkdir %s/dir2' % self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.mounts[0].client_system, cmd)
        self.assertEqual(ret, 0, "Failed to create directory2")
        g.log.info("Directory 2 on %s created successfully", self.mounts[0])

        # Creating files on client side for dir2
        for mount_obj in self.mounts:
            g.log.info("Generating data for %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)

            # Create dirs with file
            g.log.info('Creating dirs with file...')
            command = ("/usr/bin/env python %s create_deep_dirs_with_files "
                       "-d 2 -l 2 -n 2 -f 20 %s/dir2" %
                       (self.script_upload_path, mount_obj.mountpoint))

            proc = g.run_async(mount_obj.client_system,
                               command,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
        self.io_validation_complete = False

        # Reset a brick
        g.log.info('Reset of brick using start')
        brick_reset = choice(bricks_list)
        ret, _, _ = reset_brick(self.mnode, self.volname, brick_reset, "start")

        # Check if the brick is offline
        g.log.info("Check the brick status if it is offline")
        offline_bricks = get_offline_bricks_list(self.mnode, self.volname)
        self.assertEqual(offline_bricks[0], brick_reset, "Brick not offline")
        g.log.info("Expected : Brick is offline")

        # Reset brick with dest same as source with force while running IO's
        g.log.info('Reset of brick with same src and dst brick')
        ret, _, _ = reset_brick(self.mnode,
                                self.volname,
                                brick_reset,
                                "commit",
                                brick_reset,
                                force="true")
        self.assertEqual(ret, 0, "Not Expected: Reset brick failed")
        g.log.info("Expected : Reset brick is successful")

        # Validating IO's and waiting to complete
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")
        self.io_validation_complete = True

        # List all files and dirs created
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")
        g.log.info("Listing all files and directories is successful")

        # Deleting dir2
        cmd = ('rm -rf %s/dir2' % self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.mounts[0].client_system, cmd)
        self.assertEqual(ret, 0, "Failed to delete directory2")
        g.log.info("Directory 2 deleted successfully for %s", self.mounts[0])

        del self.all_mounts_procs[:]

        # Creating dir1
        cmd = ('mkdir  %s/dir1' % self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.mounts[0].client_system, cmd)
        self.assertEqual(ret, 0, "Failed to create directory1")
        g.log.info("Directory 1 created successfully for %s", self.mounts[0])

        # Create 5 dir and 5 files in each dir at mountpoint on dir1
        start, end = 1, 5
        for mount_obj in self.mounts:
            # Number of dir and files to be created.
            dir_range = str(start) + ".." + str(end)
            file_range = str(start) + ".." + str(end)
            # Create dir 1-5 at mountpoint.
            cmd = ('mkdir %s/dir1/dir{%s};' %
                   (mount_obj.mountpoint, dir_range))
            g.run(mount_obj.client_system, cmd)

            # Create files inside each dir.
            cmd = ('touch %s/dir1/dir{%s}/file{%s};' %
                   (mount_obj.mountpoint, dir_range, file_range))
            g.run(mount_obj.client_system, cmd)

            # Increment counter so that at next client dir and files are made
            # with diff offset. Like at next client dir will be named
            # dir6, dir7...dir10. Same with files.
            start += 5
            end += 5

        # Rename all files inside dir1 at mountpoint on dir1
        clients = []
        for mount_obj in self.mounts:
            clients.append(mount_obj.client_system)
            cmd = ('cd %s/dir1/dir1/; '
                   'for FILENAME in *;'
                   'do mv $FILENAME Unix_$FILENAME; '
                   'done;' % mount_obj.mountpoint)
            g.run_parallel(clients, cmd)

        # Truncate at any dir in mountpoint inside dir1
        # start is an offset to be added to dirname to act on
        # diff files at diff clients.
        start = 1
        for mount_obj in self.mounts:
            cmd = ('cd %s/dir1/dir%s/; '
                   'for FILENAME in *;'
                   'do echo > $FILENAME; '
                   'done;' % (mount_obj.mountpoint, str(start)))
            g.run(mount_obj.client_system, cmd)

        # Create softlink and hardlink of files in mountpoint. Start is an
        # offset to be added to dirname to act on diff files at diff clients.
        start = 1
        for mount_obj in self.mounts:
            cmd = ('cd %s/dir1/dir%s; '
                   'for FILENAME in *; '
                   'do ln -s $FILENAME softlink_$FILENAME; '
                   'done;' % (mount_obj.mountpoint, str(start)))
            g.run(mount_obj.client_system, cmd)
            cmd = ('cd %s/dir1/dir%s; '
                   'for FILENAME in *; '
                   'do ln $FILENAME hardlink_$FILENAME; '
                   'done;' % (mount_obj.mountpoint, str(start + 1)))
            g.run(mount_obj.client_system, cmd)
            start += 5

        # Delete op for deleting all file in one of the dirs. start is being
        # used as offset like in previous testcase in dir1
        start = 1
        for mount_obj in self.mounts:
            cmd = ('cd %s/dir1/dir%s; '
                   'for FILENAME in *; '
                   'do rm -f $FILENAME; '
                   'done;' % (mount_obj.mountpoint, str(start)))
            g.run(mount_obj.client_system, cmd)
            start += 5

        # chmod, chown, chgrp inside dir1
        # start and end used as offset to access diff files
        # at diff clients.
        start, end = 2, 5
        for mount_obj in self.mounts:
            dir_file_range = '%s..%s' % (str(start), str(end))
            cmd = ('chmod 777 %s/dir1/dir{%s}/file{%s}' %
                   (mount_obj.mountpoint, dir_file_range, dir_file_range))
            g.run(mount_obj.client_system, cmd)

            cmd = ('chown root %s/dir1/dir{%s}/file{%s}' %
                   (mount_obj.mountpoint, dir_file_range, dir_file_range))
            g.run(mount_obj.client_system, cmd)

            cmd = ('chgrp root %s/dir1/dir{%s}/file{%s}' %
                   (mount_obj.mountpoint, dir_file_range, dir_file_range))
            g.run(mount_obj.client_system, cmd)

            start += 5
            end += 5

        # Create tiny, small, medium nd large file
        # at mountpoint. Offset to differ filenames
        # at diff clients.
        offset = 1
        for mount_obj in self.mounts:
            cmd = 'fallocate -l 100 tiny_file%s.txt' % str(offset)
            g.run(mount_obj.client_system, cmd)
            cmd = 'fallocate -l 20M small_file%s.txt' % str(offset)
            g.run(mount_obj.client_system, cmd)
            cmd = 'fallocate -l 200M medium_file%s.txt' % str(offset)
            g.run(mount_obj.client_system, cmd)
            cmd = 'fallocate -l 1G large_file%s.txt' % str(offset)
            g.run(mount_obj.client_system, cmd)
            offset += 1

        # Creating files on client side for dir1
        for mount_obj in self.mounts:
            g.log.info("Generating data for %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            # Create dirs with file
            g.log.info('Creating dirs with file...')
            command = ("/usr/bin/env python %s create_deep_dirs_with_files "
                       "-d 2 -l 2 -n 2 -f 20 %s/dir1" %
                       (self.script_upload_path, mount_obj.mountpoint))

            proc = g.run_async(mount_obj.client_system,
                               command,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
        self.io_validation_complete = False

        # Validating IO's and waiting to complete
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")
        self.io_validation_complete = True

        # List all files and dirs created
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")
        g.log.info("Listing all files and directories is successful")

        # Get areequal before killing the brick
        g.log.info('Getting areequal before killing of brick...')
        ret, result_before_killing_brick = (collect_mounts_arequal(
            self.mounts[0]))
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting areequal before killing of brick ' 'is successful')

        # Reset a brick
        g.log.info('Reset of brick using start')
        ret, _, _ = reset_brick(self.mnode, self.volname, bricks_list[0],
                                "start")

        # Check if the brick is offline
        g.log.info("Check the brick status if it is offline")
        ret = are_bricks_offline(self.mnode, self.volname, [bricks_list[0]])
        self.assertTrue(ret, "Brick is not offline")
        g.log.info("Expected : Brick is offline")

        # Reset brick by giving a different source and dst brick
        g.log.info('Reset of brick by giving different src and dst brick')
        ret, _, _ = reset_brick(self.mnode, self.volname, bricks_list[0],
                                "commit", bricks_list[1])
        self.assertNotEqual(ret, 0, "Not Expected: Reset brick is successfull")
        g.log.info("Expected : Source and Destination brick must be same for"
                   " reset")

        # Reset brick with destination same as source
        g.log.info('Reset of brick with same src and dst brick')
        ret, _, _ = reset_brick(self.mnode, self.volname, bricks_list[0],
                                "commit", bricks_list[0])
        self.assertNotEqual(ret, 0, "Not Expected : Reset brick is successful")
        g.log.info("Expected : Reset brick failed,Vol id is same use force")

        # Obtain hostname of node
        ret, hostname_node1, _ = g.run(self.mnode, "hostname")
        self.assertEqual(ret, 0,
                         ("Failed to obtain hostname of node %s", self.mnode))
        g.log.info("Obtained hostname of client. IP- %s, hostname- %s",
                   self.mnode, hostname_node1.strip())

        # Reset brick with destination same as source with force using hostname
        g.log.info('Reset of brick with same src and dst brick')
        ret, _, _ = reset_brick(hostname_node1.strip(),
                                self.volname,
                                bricks_list[0],
                                "commit",
                                bricks_list[0],
                                force="true")
        self.assertEqual(ret, 0, "Not Expected: Reset brick failed")
        g.log.info("Expected : Reset brick is successful")

        # Wait for brick to come online
        g.log.info("Waiting for brick to come online")
        ret = wait_for_bricks_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, "Bricks are not online")
        g.log.info("Expected : Bricks are online")

        # Monitor heal completion
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal has not yet completed')
        g.log.info('Heal has completed successfully')

        # Check if bricks are online
        all_bricks = get_all_bricks(self.mnode, self.volname)
        ret = are_bricks_online(self.mnode, self.volname, all_bricks)
        self.assertTrue(ret, 'All bricks are not online')
        g.log.info('All bricks are online')

        # Bring down other bricks to max redundancy
        # Get List of bricks to bring offline

        # Bringing bricks offline
        ret = bring_bricks_offline(self.volname, bricks_list[1:3])
        self.assertTrue(ret, 'Bricks not offline')
        g.log.info('Bricks are offline successfully')
        sleep(2)

        # Check if 4 bricks are online
        all_bricks = []
        all_bricks = [
            bricks_list[0], bricks_list[3], bricks_list[4], bricks_list[5]
        ]
        ret = are_bricks_online(self.mnode, self.volname, all_bricks)
        self.assertTrue(ret, 'All bricks are not online')
        g.log.info('All bricks are online')

        # Check mount point
        cmd = 'ls -lrt /mnt'
        ret, _, _ = g.run(self.mounts[0].client_system, cmd)
        g.log.info("Client mount point details ")

        # Get arequal after bringing down bricks
        g.log.info('Getting arequal after bringing down bricks...')
        ret, result_offline_redundant_brick1 = (collect_mounts_arequal(
            self.mounts[0]))
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting arequal before getting bricks offline '
                   'is successful')

        # Bring bricks online
        list_of_bricks_to_bring_online = bricks_list[1:3]
        ret = bring_bricks_online(self.mnode, self.volname,
                                  list_of_bricks_to_bring_online)
        self.assertTrue(ret, 'Bricks not brought online')
        g.log.info('Bricks are online successfully')

        # Wait for brick to come online
        g.log.info("Waiting for brick to come online")
        ret = wait_for_bricks_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, "Bricks are not online")
        g.log.info("Expected : Bricks are online")

        # Check if bricks are online
        all_bricks = get_all_bricks(self.mnode, self.volname)
        ret = are_bricks_online(self.mnode, self.volname, all_bricks)
        self.assertTrue(ret, 'All bricks are not online')
        g.log.info('All bricks are online')

        # Reset brick without bringing down brick
        g.log.info('Reset of brick by giving different src and dst brick')
        ret, _, _ = reset_brick(self.mnode, self.volname, bricks_list[1],
                                "commit", bricks_list[1])
        self.assertNotEqual(ret, 0, "Not Expected: Reset brick passed")
        g.log.info("Expected : Brick reset failed as source brick must be"
                   " stopped")

        # Kill the brick manually
        ret = bring_bricks_offline(self.volname, [bricks_list[1]])
        self.assertTrue(ret, 'Brick not offline')
        g.log.info('Brick is offline successfully')

        # Check if the brick is offline
        g.log.info("Check the brick status if it is offline")
        ret = are_bricks_offline(self.mnode, self.volname, [bricks_list[1]])
        self.assertTrue(ret, "Brick is not offline")
        g.log.info("Expected : Brick is offline")

        # Reset brick with dest same as source after killing brick manually
        g.log.info('Reset of brick by giving different src and dst brick')
        ret, _, _ = reset_brick(self.mnode,
                                self.volname,
                                bricks_list[1],
                                "commit",
                                bricks_list[1],
                                force="true")
        self.assertEqual(ret, 0, "Not Expected: Reset brick failed")
        g.log.info("Expected : Reset brick is successful")

        # Wait for brick to come online
        g.log.info("Waiting for brick to come online")
        ret = wait_for_bricks_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, "Bricks are not online")
        g.log.info("Expected : Bricks are online")

        # Check if bricks are online
        all_bricks = get_all_bricks(self.mnode, self.volname)
        ret = are_bricks_online(self.mnode, self.volname, all_bricks)
        self.assertTrue(ret, 'All bricks are not online')
        g.log.info('All bricks are online')

        # Bring down other bricks to max redundancy
        # Bringing bricks offline
        ret = bring_bricks_offline(self.volname, bricks_list[2:4])
        self.assertTrue(ret, 'Bricks not offline')
        g.log.info('Bricks are offline successfully')

        # Check mount point
        cmd = 'ls -lrt /mnt'
        ret, _, _ = g.run(self.mounts[0].client_system, cmd)
        g.log.info("Client mount point details")

        # Get arequal after bringing down bricks
        g.log.info('Getting arequal after bringing down redundant bricks...')
        ret, result_offline_redundant_brick2 = (collect_mounts_arequal(
            self.mounts[0]))
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting arequal before getting bricks offline '
                   'is successful')

        # Bring bricks online
        list_of_bricks_to_bring_online = bricks_list[2:4]
        ret = bring_bricks_online(self.mnode, self.volname,
                                  list_of_bricks_to_bring_online)
        self.assertTrue(ret, 'Bricks not brought online')
        g.log.info('Bricks are online successfully')

        # Removing brick from backend
        brick = bricks_list[0].strip().split(":")
        cmd = "rm -rf %s" % brick[1]
        ret, _, _ = g.run(self.mnode, cmd)
        self.assertEqual(ret, 0, "Failed to delete brick %s" % bricks_list[0])
        g.log.info("Removed brick %s sucessfully", bricks_list[0])

        # Check if the brick is offline
        count = 0
        while count <= 20:
            g.log.info("Check the brick status if it is offline")
            ret = are_bricks_offline(self.mnode, self.volname,
                                     [bricks_list[0]])
            if ret:
                break
            sleep(2)
            count = +1
        self.assertTrue(ret, "Brick is not offline")
        g.log.info("Expected : Brick is offline")

        # Reset brick with destination same as source
        g.log.info('Reset of brick with same src and dst brick')
        ret, _, _ = reset_brick(hostname_node1.strip(), self.volname,
                                bricks_list[0], "commit", bricks_list[0])
        self.assertEqual(ret, 0, "Not Expected: Reset brick failed")
        g.log.info("Expected : Reset brick is successful")

        # Monitor heal completion
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal has not yet completed')
        g.log.info('Heal has completed successfully')

        # Comparing arequals
        self.assertEqual(
            result_before_killing_brick, result_offline_redundant_brick1,
            'Arequals are not equals before killing brick'
            'processes and after offlining redundant bricks')
        g.log.info('Arequals are equals before killing brick'
                   'processes and after offlining redundant bricks')

        # Comparing arequals
        self.assertEqual(
            result_offline_redundant_brick2, result_offline_redundant_brick1,
            'Arequals are not equals for offlining redundant'
            ' bricks')
        g.log.info('Arequals are equals for offlining redundant bricks')

        # Deleting dir1
        cmd = ('rm -rf %s/dir1' % self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.mounts[0].client_system, cmd)
        self.assertEqual(ret, 0, "Failed to delete directory1")
        g.log.info("Directory 1 deleted successfully for %s", self.mounts[0])