Пример #1
0
    def test_peer_probe_invalid_ip_nonexist_host_nonexist_ip(self):
        '''
        Test script to verify peer probe non existing ip,
        non_exsting_host and invalid-ip, peer probe has to
        be fail for invalid-ip, non-existing-ip and
        non existing host, verify Glusterd services up and
        running or not after invalid peer probe,
        and core file should not get created
        under "/", /var/log/core and /tmp  directory
        '''
        ret, test_timestamp, _ = g.run_local('date +%s')
        test_timestamp = test_timestamp.strip()
        g.log.info("Running Test : %s", self.id())

        # Assigning non existing ip to variable
        self.non_exist_ip = '256.256.256.256'

        # Assigning invalid ip to variable
        self.invalid_ip = '10.11.a'

        # Assigning non existing host to variable
        self.non_exist_host = 'abc.lab.eng.blr.redhat.com'

        # Peer probe checks for non existing host
        g.log.info("peer probe checking for non existing host")
        ret, _, _ = peer_probe(self.mnode, self.non_exist_host)
        self.assertNotEqual(
            ret, 0, "peer probe should fail for "
            "non existhost: %s" % self.non_exist_host)
        g.log.info("peer probe failed for non existing host")

        # Peer probe checks for invalid ip
        g.log.info("peer probe checking for invalid ip")
        ret, _, _ = peer_probe(self.mnode, self.invalid_ip)
        self.assertNotEqual(
            ret, 0, "peer probe shouldfail for "
            "invalid ip: %s" % self.invalid_ip)
        g.log.info("peer probe failed for invalid_ip")

        # peer probe checks for non existing ip
        g.log.info("peer probe checking for non existing ip")
        ret, _, _ = peer_probe(self.mnode, self.non_exist_ip)
        self.assertNotEqual(
            ret, 0, "peer probe should fail for non exist "
            "ip :%s" % self.non_exist_ip)
        g.log.info("peer probe failed for non existing ip")

        # Checks Glusterd services running or not after peer probe
        # to invalid host and non existing host

        self.mnode_list = []
        self.mnode_list.append(self.mnode)
        ret = is_glusterd_running(self.mnode_list)
        self.assertEqual(ret, 0, "Glusterd service should be running")

        # Chekcing core file created or not in "/", "/tmp" and
        # "/var/log/core" directory
        ret = is_core_file_created(self.servers, test_timestamp)
        self.assertTrue(ret, "core file found")
Пример #2
0
    def test_concurrent_set(self):
        # time stamp of current test case
        ret, test_timestamp, _ = g.run_local('date +%s')
        test_timestamp = test_timestamp.strip()
        # Create a volume
        self.volname = "first-vol"
        self.brick_list = form_bricks_list(self.mnode, self.volname, 3,
                                           self.servers, self.all_servers_info)

        ret = volume_create(self.mnode,
                            self.volname,
                            self.brick_list,
                            force=False)
        self.assertEqual(ret[0], 0, ("Unable"
                                     "to create volume % s" % self.volname))
        g.log.info("Volume created successfuly % s" % self.volname)

        # Create a volume
        self.volname = "second-vol"
        self.brick_list = form_bricks_list(self.mnode, self.volname, 3,
                                           self.servers, self.all_servers_info)
        g.log.info("Creating a volume")
        ret = volume_create(self.mnode,
                            self.volname,
                            self.brick_list,
                            force=False)
        self.assertEqual(ret[0], 0, ("Unable"
                                     "to create volume % s" % self.volname))
        g.log.info("Volume created successfuly % s" % self.volname)

        cmd1 = ("for i in `seq 1 100`; do gluster volume set first-vol "
                "read-ahead on; done")
        cmd2 = ("for i in `seq 1 100`; do gluster volume set second-vol "
                "write-behind on; done")

        proc1 = g.run_async(random.choice(self.servers), cmd1)
        proc2 = g.run_async(random.choice(self.servers), cmd2)

        ret1, out1, err1 = proc1.async_communicate()
        ret2, out2, err2 = proc2.async_communicate()

        self.assertEqual(
            ret1, 0, "Concurrent volume set on different volumes "
            "simultaneously failed")
        self.assertEqual(
            ret2, 0, "Concurrent volume set on different volumes "
            "simultaneously failed")

        g.log.info("Setting options on different volumes @ same time "
                   "successfully completed")
        ret = is_core_file_created(self.servers, test_timestamp)
        if (ret):
            g.log.info("No core file found, glusterd service "
                       "running successfully")
        else:
            g.log.error("core file found in directory, it "
                        "indicates the glusterd service crash")
            self.assertTrue(ret, ("glusterd service should not crash"))
    def test_invalid_memory_read_after_freed(self):
        """
        Test case:
        1. Create a volume and start it.
        2. Mount the volume using FUSE.
        3. Create multiple level of dirs and files inside every dir.
        4. Rename files such that linkto files are created.
        5. From the mount point do an rm -rf * and check if all files
           are delete or not from mount point as well as backend bricks.
        """
        # Fetch timestamp to check for core files
        ret, test_timestamp, _ = g.run(self.mnode, "date +%s")
        self.assertEqual(ret, 0, "date command failed")
        test_timestamp = test_timestamp.strip()

        # Create multiple level of dirs and files inside every dir
        cmd = ("cd %s; for i in {1..100}; do mkdir dir$i; cd dir$i; "
               "for i in {1..200}; do dd if=/dev/urandom of=file$i bs=1K"
               " count=1; done; done" % self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.first_client, cmd)
        self.assertFalse(ret, "Failed to create dirs and files")

        # Rename files such that linkto files are created
        cmd = ("cd %s; for i in {1..100}; do cd dir$i; for i in {1..200}; do "
               "mv file$i ntfile$i; done; done" % self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.first_client, cmd)
        self.assertFalse(ret, "Failed to rename files")
        g.log.info("Files created and renamed successfully")

        # From the mount point do an rm -rf * and check if all files
        # are delete or not from mount point as well as backend bricks.
        ret, _, _ = g.run(self.first_client,
                          "rm -rf {}/*".format(self.mounts[0].mountpoint))
        self.assertFalse(ret, "rn -rf * failed on mount point")

        ret = get_dir_contents(self.first_client,
                               "{}/".format(self.mounts[0].mountpoint))
        self.assertEqual(
            ret, [], "Unexpected: Files and directories still "
            "seen from mount point")

        for brick in get_all_bricks(self.mnode, self.volname):
            node, brick_path = brick.split(":")
            ret = get_dir_contents(node, "{}/".format(brick_path))
            self.assertEqual(
                ret, [], "Unexpected: Files and dirs still seen "
                "on brick %s on node %s" % (brick_path, node))
        g.log.info("rm -rf * on mount point successful")

        # Check for core file on servers and clients
        servers = self.servers + [self.first_client]
        ret = is_core_file_created(servers, test_timestamp)
        self.assertTrue(ret, "Core files found on servers used for test")
        g.log.info("No cores found on all participating servers")
    def test_enable_brickmux_create_and_stop_three_volumes(self):
        """
        Test Case:
        1.Set cluster.brick-multiplex to enabled.
        2.Create three 1x3 replica volumes.
        3.Start all the three volumes.
        4.Stop three volumes one by one.
        """

        # Timestamp of current test case of start time
        ret, test_timestamp, _ = g.run_local('date +%s')
        test_timestamp = test_timestamp.strip()

        # Setting cluster.brick-multiplex to enable
        ret = set_volume_options(self.mnode, 'all',
                                 {'cluster.brick-multiplex': 'enable'})
        self.assertTrue(ret, "Failed to set brick-multiplex to enable.")
        g.log.info("Successfully set brick-multiplex to enable.")

        # Create and start 3 volume
        for number in range(1, 4):
            self.volume['name'] = ("test_volume_%s" % number)
            self.volname = ("test_volume_%s" % number)
            ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
            self.assertTrue(ret,
                            "Failed to create and start %s" % self.volname)
            g.log.info("Successfully created and started volume %s.",
                       self.volname)

        # Checking brick process count.
        for brick in get_all_bricks(self.mnode, self.volname):
            server = brick.split(":")[0]
            count = get_brick_processes_count(server)
            self.assertEqual(
                count, 1, "ERROR: More than one brick process on %s." % server)
            g.log.info("Only one brick process present on %s", server)

        # Stop three volumes one by one.
        for number in range(1, 4):
            self.volume['name'] = ("test_volume_%s" % number)
            self.volname = ("test_volume_%s" % number)
            ret, _, _ = volume_stop(self.mnode, self.volname)
            self.assertEqual(ret, 0,
                             "Failed to stop the volume %s" % self.volname)
            g.log.info("Volume %s stopped successfully", self.volname)

        # Checking for core files.
        ret = is_core_file_created(self.servers, test_timestamp)
        self.assertTrue(ret, "Core file found.")
        g.log.info("No core files found, glusterd service running "
                   "successfully")
Пример #5
0
    def test_verify_peer_probe_with_firewall_ports_not_opened(self):
        """
        Test Steps:
        1. Open glusterd port only in  Node1 using firewall-cmd command
        2. Perform peer probe to Node2 from Node 1
        3. Verify glusterd.log for Errors
        4. Check for core files created
        """

        ret, test_timestamp, _ = g.run_local('date +%s')
        test_timestamp = test_timestamp.strip()

        # Remove firewall service on the node to probe to
        self._remove_firewall_service()

        # Try peer probe from mnode to node
        ret, _, err = peer_probe(self.mnode, self.node_to_probe)
        self.assertEqual(ret, 1, ("Unexpected behavior: Peer probe should"
                                  " fail when the firewall services are "
                                  "down but returned success"))

        expected_err = ('peer probe: failed: Probe returned with '
                        'Transport endpoint is not connected\n')
        self.assertEqual(
            err, expected_err,
            "Expected error {}, but returned {}".format(expected_err, err))
        msg = ("Peer probe of {} from {} failed as expected ".format(
            self.mnode, self.node_to_probe))
        g.log.info(msg)

        # Verify there are no glusterd crashes
        status = True
        glusterd_logs = (self._get_test_specific_glusterd_log(
            self.mnode).split("\n"))
        for line in glusterd_logs:
            if ' E ' in line:
                status = False
                g.log.info("Error found: ' %s '", line)

        self.assertTrue(status, "Glusterd crash found")

        # Verify no core files are created
        ret = is_core_file_created(self.servers, test_timestamp)
        self.assertTrue(ret, "Unexpected crash found.")
        g.log.info("No core file found as expected")
    def test_peer_probe_when_glusterd_down(self):
        # pylint: disable=too-many-statements
        '''
        Test script to verify the behavior when we try to peer
        probe a valid node whose glusterd is down
        Also post validate to make sure no core files are created
        under "/", /var/log/core and /tmp  directory

        Ref: BZ#1257394 Provide meaningful error on peer probe and peer detach
        Test Steps:
        1 check the current peer status
        2 detach one of the valid nodes which is already part of cluster
        3 stop glusterd on that node
        4 try to attach above node to cluster, which must fail with
          Transport End point error
        5 Recheck the test using hostname, expected to see same result
        6 start glusterd on that node
        7 halt/reboot the node
        8 try to peer probe the halted node, which must fail again.
        9 The only error accepted is
          "peer probe: failed: Probe returned with Transport endpoint is not
          connected"
        10 Check peer status and make sure no other nodes in peer reject state
        '''

        ret, test_timestamp, _ = g.run_local('date +%s')
        test_timestamp = test_timestamp.strip()

        # Detach one of the nodes which is part of the cluster
        g.log.info("detaching server %s ", self.servers[1])
        ret, _, err = peer_detach(self.mnode, self.servers[1])
        msg = 'peer detach: failed: %s is not part of cluster\n' \
              % self.servers[1]
        if ret:
            self.assertEqual(err, msg, "Failed to detach %s "
                             % (self.servers[1]))

        # Bring down glusterd of the server which has been detached
        g.log.info("Stopping glusterd on %s ", self.servers[1])
        ret = stop_glusterd(self.servers[1])
        self.assertTrue(ret, "Fail to stop glusterd on %s " % self.servers[1])

        # Trying to peer probe the node whose glusterd was stopped using IP
        g.log.info("Peer probing %s when glusterd down ", self.servers[1])
        ret, _, err = peer_probe(self.mnode, self.servers[1])
        self.assertNotEqual(ret, 0, "Peer probe should not pass when "
                                    "glusterd is down")
        self.assertEqual(err, "peer probe: failed: Probe returned with "
                              "Transport endpoint is not connected\n")

        # Trying to peer probe the same node with hostname
        g.log.info("Peer probing node %s using hostname with glusterd down ",
                   self.servers[1])
        hostname = g.run(self.servers[1], "hostname")
        ret, _, err = peer_probe(self.mnode, hostname[1].strip())
        self.assertNotEqual(ret, 0, "Peer probe should not pass when "
                                    "glusterd is down")
        self.assertEqual(err, "peer probe: failed: Probe returned with"
                              " Transport endpoint is not connected\n")

        # Start glusterd again for the next set of test steps
        g.log.info("starting glusterd on %s ", self.servers[1])
        ret = start_glusterd(self.servers[1])
        self.assertTrue(ret, "glusterd couldn't start successfully on %s"
                        % self.servers[1])

        # Bring down the network for sometime
        network_status = bring_down_network_interface(self.servers[1], 150)

        # Peer probing the node using IP when it is still not online
        g.log.info("Peer probing node %s when network is down",
                   self.servers[1])
        ret, _, err = peer_probe(self.mnode, self.servers[1])
        self.assertNotEqual(ret, 0, "Peer probe passed when it was expected to"
                                    " fail")
        self.assertEqual(err.split("\n")[0], "peer probe: failed: Probe "
                                             "returned with Transport endpoint"
                                             " is not connected")

        # Peer probing the node using hostname when it is still not online
        g.log.info("Peer probing node %s using hostname which is still "
                   "not online ",
                   self.servers[1])
        ret, _, err = peer_probe(self.mnode, hostname[1].strip())
        self.assertNotEqual(ret, 0, "Peer probe should not pass when node "
                                    "has not come online")
        self.assertEqual(err.split("\n")[0], "peer probe: failed: Probe "
                                             "returned with Transport endpoint"
                                             " is not connected")

        ret, _, _ = network_status.async_communicate()
        if ret != 0:
            g.log.error("Failed to perform network interface ops")

        # Peer probe the node must pass
        g.log.info("peer probing node %s", self.servers[1])
        ret, _, err = peer_probe(self.mnode, self.servers[1])
        self.assertEqual(ret, 0, "Peer probe has failed unexpectedly with "
                                 "%s " % err)

        # Checking if core file created in "/", "/tmp" and "/var/log/core"
        ret = is_core_file_created(self.servers, test_timestamp)
        self.assertTrue(ret, "core file found")
Пример #7
0
    def test_volume_get(self):
        """
        desc: performing different combinations of gluter
        volume get functionalities
        1. Create a gluster cluster
        2. Get the option from the non-existing volume,
        gluster volume get <non-existing vol> io-cache
        3. Get all options from the non-existing volume,
        gluster volume get <non-existing volume > all
        4. Provide a incorrect command syntax to get the options
        from the volume
            gluster volume get <vol-name>
            gluster volume get
            gluster volume get io-cache
        5. Create any type of volume in the cluster
        6. Get the value of the non-existing option
            gluster volume get <vol-name> temp.key
        7. get all options set on the volume
            gluster volume get <vol-name> all
        8. get the specific option set on the volume
            gluster volume get <vol-name> io-cache
        9. Set an option on the volume
            gluster volume set <vol-name> performance.low-prio-threads 14
        10. Get all the options set on the volume and check
        for low-prio-threads
            gluster volume get <vol-name> all then get the
            low-prio-threads value
        11. Get all the options set on the volume
                gluster volume get <vol-name> all
        12.  Check for any cores in "cd /"
        """
        # pylint: disable=too-many-statements

        # time stamp of current test case
        ret, test_timestamp, _ = g.run_local('date +%s')
        test_timestamp = test_timestamp.strip()

        # performing gluster volume get command for non exist volume io-cache
        self.non_exist_volume = "abc99"
        ret, _, err = g.run(
            self.mnode,
            "gluster volume get %s io-cache" % self.non_exist_volume)
        self.assertNotEqual(
            ret, 0, "gluster volume get command should fail "
            "for non existing volume with io-cache "
            "option :%s" % self.non_exist_volume)
        msg = ('Volume ' + self.non_exist_volume + ' does not exist')
        self.assertIn(
            msg, err, "No proper error message for non existing "
            "volume with io-cache option :%s" % self.non_exist_volume)
        g.log.info(
            "gluster volume get command failed successfully for non "
            "existing volume with io-cache option"
            ":%s", self.non_exist_volume)

        # performing gluster volume get all command for non exist volume
        ret, _, err = g.run(
            self.mnode, "gluster volume get %s all" % self.non_exist_volume)
        self.assertNotEqual(
            ret, 0, "gluster volume get command should fail "
            "for non existing volume %s with all "
            "option" % self.non_exist_volume)
        self.assertIn(
            msg, err, "No proper error message for non existing "
            "volume with all option:%s" % self.non_exist_volume)
        g.log.info(
            "gluster volume get command failed successfully for non "
            "existing volume with all option :%s", self.non_exist_volume)

        # performing gluster volume get command for non exist volume
        ret, _, err = g.run(self.mnode, "gluster volume get "
                            "%s" % self.non_exist_volume)
        self.assertNotEqual(
            ret, 0, "gluster volume get command should "
            "fail for non existing volume :%s" % self.non_exist_volume)
        msg = 'get <VOLNAME|all> <key|all>'
        self.assertIn(
            msg, err, "No proper error message for non existing "
            "volume :%s" % self.non_exist_volume)
        g.log.info(
            "gluster volume get command failed successfully for non "
            "existing volume :%s", self.non_exist_volume)

        # performing gluster volume get command without any volume name given
        ret, _, err = g.run(self.mnode, "gluster volume get")
        self.assertNotEqual(ret, 0, "gluster volume get command should fail")
        self.assertIn(
            msg, err, "No proper error message for gluster "
            "volume get command")
        g.log.info("gluster volume get command failed successfully")

        # performing gluster volume get io-cache command
        # without any volume name given
        ret, _, err = g.run(self.mnode, "gluster volume get io-cache")
        self.assertNotEqual(
            ret, 0, "gluster volume get io-cache command "
            "should fail")
        self.assertIn(
            msg, err, "No proper error message for gluster volume "
            "get io-cache command")
        g.log.info("gluster volume get io-cache command failed successfully")

        # gluster volume get volname with non existing option
        ret, _, err = g.run(self.mnode, "gluster volume "
                            "get %s temp.key" % self.volname)
        self.assertNotEqual(
            ret, 0, "gluster volume get command should fail "
            "for existing volume %s with non-existing "
            "option" % self.volname)
        msg = 'Did you mean auth.allow or ...reject?'
        self.assertIn(
            msg, err, "No proper error message for existing "
            "volume %s with non-existing option" % self.volname)
        g.log.info(
            "gluster volume get command failed successfully for "
            "existing volume %s with non existing option", self.volname)

        # performing gluster volume get volname all

        ret = get_volume_options(self.mnode, self.volname, "all")
        self.assertIsNotNone(
            ret, "gluster volume get %s all command "
            "failed" % self.volname)
        g.log.info(
            "gluster volume get %s all command executed "
            "successfully", self.volname)

        # performing gluster volume get volname io-cache
        ret = get_volume_options(self.mnode, self.volname, "io-cache")
        self.assertIsNotNone(
            ret, "gluster volume get %s io-cache command "
            "failed" % self.volname)
        self.assertIn("on", ret['performance.io-cache'], "io-cache value "
                      "is not correct")
        g.log.info("io-cache value is correct")

        # Performing gluster volume set volname performance.low-prio-threads
        prio_thread = {'performance.low-prio-threads': '14'}
        ret = set_volume_options(self.mnode, self.volname, prio_thread)
        self.assertTrue(
            ret, "gluster volume set %s performance.low-prio-"
            "threads failed" % self.volname)
        g.log.info(
            "gluster volume set %s "
            "performance.low-prio-threads executed successfully", self.volname)

        # Performing gluster volume get all, checking low-prio threads value
        ret = get_volume_options(self.mnode, self.volname, "all")
        self.assertIsNotNone(
            ret, "gluster volume get %s all "
            "failed" % self.volname)
        self.assertIn("14", ret['performance.low-prio-threads'],
                      "performance.low-prio-threads value is not correct")
        g.log.info("performance.low-prio-threads value is correct")

        # performing gluster volume get volname all
        ret = get_volume_options(self.mnode, self.volname, "all")
        self.assertIsNotNone(
            ret, "gluster volume get %s all command "
            "failed" % self.volname)
        g.log.info(
            "gluster volume get %s all command executed "
            "successfully", self.volname)

        # Checking core file created or not in "/" directory
        ret = is_core_file_created(self.servers, test_timestamp)
        self.assertTrue(ret, "glusterd service should not crash")
        g.log.info("No core file found, glusterd service "
                   "running successfully")
Пример #8
0
    def test_peer_detach_host(self):
        # peer Detaching specified server from cluster
        # peer Detaching detached server again
        # peer Detaching invalid host
        # peer Detaching Non exist host
        # peer Checking Core file created or not
        # Peer detach one node which contains the bricks of volume created
        # Peer detach force a node which is hosting bricks of a volume

        # Timestamp of current test case of start time
        ret, test_timestamp, _ = g.run_local('date +%s')
        test_timestamp = test_timestamp.strip()

        # Assigning non existing host to variable
        self.non_exist_host = '256.256.256.256'

        # Assigning invalid ip to vaiable
        self.invalid_ip = '10.11.a'

        # Peer detach to specified server
        g.log.info("Start detach specified server :%s" % self.servers[1])
        ret, out, _ = peer_detach(self.mnode, self.servers[1])
        self.assertEqual(ret, 0,
                         "Failed to detach server :%s" % self.servers[1])

        # Detached server detaching again, Expected to fail detach
        g.log.info("Start detached server detaching "
                   "again : %s" % self.servers[1])
        ret, out, _ = peer_detach(self.mnode, self.servers[1])
        self.assertNotEqual(
            ret, 0, "Detach server should "
            "fail :%s" % self.servers[1])

        # Probing detached server
        g.log.info("Start probing detached server : %s" % self.servers[1])
        ret = peer_probe_servers(self.mnode, self.servers[1])
        self.assertTrue(
            ret, "Peer probe failed from %s to other "
            "server : %s" % (self.mnode, self.servers[1]))

        # Detach invalid host
        g.log.info("Start detaching invalid host :%s " % self.invalid_ip)
        ret, out, _ = peer_detach(self.mnode, self.invalid_ip)
        self.assertNotEqual(
            ret, 0, "Detach invalid host should "
            "fail :%s" % self.invalid_ip)

        # Detach non exist host
        g.log.info("Start detaching non exist host : %s" % self.non_exist_host)
        ret, out, _ = peer_detach(self.mnode, self.non_exist_host)
        self.assertNotEqual(
            ret, 0, "Detach non existing host "
            "should fail :%s" % self.non_exist_host)

        # Chekcing core. file created or not in "/", "/tmp", "/log/var/core
        # directory
        ret = is_core_file_created(self.servers, test_timestamp)
        self.assertTrue(ret, "glusterd service should not crash")
        g.log.info("No core file found, glusterd service running "
                   "successfully")

        # Creating Volume
        g.log.info("Started creating volume: %s" % self.volname)
        ret = self.setup_volume()
        self.assertTrue(ret, "Volume creation failed: %s" % self.volname)

        # Peer detach one node which contains the bricks of the volume created
        g.log.info("Start detaching server %s which is hosting "
                   "bricks of a volume" % self.servers[1])
        ret, out, err = peer_detach(self.mnode, self.servers[1])
        self.assertNotEqual(ret, 0,
                            "detach server should fail: %s" % self.servers[1])
        msg = ('peer detach: failed: Brick(s) with the peer ' +
               self.servers[1] + ' ' + 'exist in cluster')
        self.assertIn(msg, err, "Peer detach not failed with "
                      "proper error message")

        #  Peer detach force a node which is hosting bricks of a volume
        g.log.info("start detaching server %s with force option "
                   "which is hosting bricks of a volume" % self.servers[1])
        ret, out, err = peer_detach(self.mnode, self.servers[1], force=True)
        self.assertNotEqual(
            ret, 0, "detach server should fail with force "
            "option : %s" % self.servers[1])
        msg = ('peer detach: failed: Brick(s) with the peer ' +
               self.servers[1] + ' ' + 'exist in cluster')
        self.assertIn(
            msg, err, "Peer detach not failed with proper "
            "error message with force option")
Пример #9
0
    def test_peer_probe_when_glusterd_down(self):
        # pylint: disable=too-many-statements
        '''
        Test script to verify the behavior when we try to peer
        probe a valid node whose glusterd is down
        Also post validate to make sure no core files are created
        under "/", /var/log/core and /tmp  directory

        Ref: BZ#1257394 Provide meaningful error on peer probe and peer detach
        Test Steps:
        1 check the current peer status
        2 detach one of the valid nodes which is already part of cluster
        3 stop glusterd on that node
        4 try to attach above node to cluster, which must fail with
          Transport End point error
        5 Recheck the test using hostname, expected to see same result
        6 start glusterd on that node
        7 halt/reboot the node
        8 try to peer probe the halted node, which must fail again.
        9 The only error accepted is
          "peer probe: failed: Probe returned with Transport endpoint is not
          connected"
        10 Check peer status and make sure no other nodes in peer reject state
        '''

        ret, test_timestamp, _ = g.run_local('date +%s')
        test_timestamp = test_timestamp.strip()

        # detach one of the nodes which is part of the cluster
        g.log.info("detaching server %s ", self.servers[1])
        ret, _, err = peer_detach(self.mnode, self.servers[1])
        msg = 'peer detach: failed: %s is not part of cluster\n' \
              % self.servers[1]
        if ret:
            self.assertEqual(err, msg, "Failed to detach %s "
                             % (self.servers[1]))

        # bring down glusterd of the server which has been detached
        g.log.info("Stopping glusterd on %s ", self.servers[1])
        ret = stop_glusterd(self.servers[1])
        self.assertTrue(ret, "Fail to stop glusterd on %s " % self.servers[1])

        # trying to peer probe the node whose glusterd was stopped using its IP
        g.log.info("Peer probing %s when glusterd down ", self.servers[1])
        ret, _, err = peer_probe(self.mnode, self.servers[1])
        self.assertNotEqual(ret, 0, "Peer probe should not pass when "
                                    "glusterd is down")
        self.assertEqual(err, "peer probe: failed: Probe returned with "
                              "Transport endpoint is not connected\n")

        # trying to peer probe the same node with hostname
        g.log.info("Peer probing node %s using hostname with glusterd down ",
                   self.servers[1])
        hostname = g.run(self.servers[1], "hostname")
        ret, _, err = peer_probe(self.mnode, hostname[1].strip())
        self.assertNotEqual(ret, 0, "Peer probe should not pass when "
                                    "glusterd is down")
        self.assertEqual(err, "peer probe: failed: Probe returned with"
                              " Transport endpoint is not connected\n")

        # start glusterd again for the next set of test steps
        g.log.info("starting glusterd on %s ", self.servers[1])
        ret = start_glusterd(self.servers[1])
        self.assertTrue(ret, "glusterd couldn't start successfully on %s"
                        % self.servers[1])

        # reboot a server and then trying to peer probe at the time of reboot
        g.log.info("Rebooting %s and checking peer probe", self.servers[1])
        reboot = g.run_async(self.servers[1], "reboot")

        # Mandatory sleep for 3 seconds to make sure node is in halted state
        sleep(3)

        # Peer probing the node using IP when it is still not online
        g.log.info("Peer probing node %s which has been issued a reboot ",
                   self.servers[1])
        ret, _, err = peer_probe(self.mnode, self.servers[1])
        self.assertNotEqual(ret, 0, "Peer probe passed when it was expected to"
                                    " fail")
        self.assertEqual(err, "peer probe: failed: Probe returned with "
                              "Transport endpoint is not connected\n")

        # Peer probing the node using hostname when it is still not online
        g.log.info("Peer probing node %s using hostname which is still "
                   "not online ",
                   self.servers[1])
        ret, _, err = peer_probe(self.mnode, hostname[1].strip())
        self.assertNotEqual(ret, 0, "Peer probe should not pass when node "
                                    "has not come online")
        self.assertEqual(err, "peer probe: failed: Probe returned with "
                              "Transport endpoint is not connected\n")

        ret, _, _ = reboot.async_communicate()
        self.assertEqual(ret, 255, "reboot failed")

        # Validate if rebooted node is online or not
        count = 0
        while count < 40:
            sleep(15)
            ret, _ = are_nodes_online(self.servers[1])
            if ret:
                g.log.info("Node %s is online", self.servers[1])
                break
            count += 1
        self.assertTrue(ret, "Node in test not yet online")

        # check if glusterd is running post reboot
        ret = wait_for_glusterd_to_start(self.servers[1],
                                         glusterd_start_wait_timeout=120)
        self.assertTrue(ret, "Glusterd service is not running post reboot")

        # peer probe the node must pass
        g.log.info("peer probing node %s", self.servers[1])
        ret, _, err = peer_probe(self.mnode, self.servers[1])
        self.assertEqual(ret, 0, "Peer probe has failed unexpectedly with "
                                 "%s " % err)

        # checking if core file created in "/", "/tmp" and "/var/log/core"
        ret = is_core_file_created(self.servers, test_timestamp)
        self.assertTrue(ret, "core file found")
    def test_profile_operations(self):
        """
        Test Case:
        1) Create a volume and start it.
        2) Mount volume on client and start IO.
        3) Start profile on the volume.
        4) Run profile info and see if all bricks are present or not.
        6) Create another volume.
        7) Run profile info without starting the profile.
        8) Run profile info with all possible options without starting
           the profile.
        """
        # Timestamp of current test case of start time
        ret, test_timestamp, _ = g.run_local('date +%s')
        test_timestamp = test_timestamp.strip()

        # Start IO on mount points.
        g.log.info("Starting IO on all mounts...")
        self.all_mounts_procs = []
        counter = 1
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dir-depth 4 "
                   "--dir-length 6 "
                   "--dirname-start-num %d "
                   "--max-num-of-dirs 3 "
                   "--num-of-files 5 %s" %
                   (self.script_upload_path, counter, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
            counter += 1

        # Start profile on volume.
        ret, _, _ = profile_start(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, "Failed to start profile on volume: %s" % self.volname)
        g.log.info("Successfully started profile on volume: %s", self.volname)

        # Getting and checking output of profile info.
        ret, out, _ = profile_info(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, "Failed to run profile info on volume: %s" % self.volname)
        g.log.info("Successfully executed profile info on volume: %s",
                   self.volname)

        # Checking if all bricks are present in profile info.
        brick_list = get_all_bricks(self.mnode, self.volname)
        for brick in brick_list:
            self.assertTrue(
                brick in out,
                "Brick %s not a part of profile info output." % brick)
            g.log.info("Brick %s showing in profile info output.", brick)

        # Validate IO
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")
        g.log.info("IO validation complete.")

        # Create and start a volume
        self.volume['name'] = "volume_2"
        self.volname = "volume_2"
        ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
        self.assertTrue(ret, "Failed to create and start volume")
        g.log.info("Successfully created and started volume_2")

        # Check profile info on volume without starting profile
        ret, _, _ = profile_info(self.mnode, self.volname)
        self.assertNotEqual(
            ret, 0, "Unexpected:Successfully ran profile info"
            " on volume: %s" % self.volname)
        g.log.info("Expected: Failed to run pofile info on volume: %s",
                   self.volname)

        # Running profile info with different profile options.
        profile_options = ('peek', 'incremental', 'clear', 'incremental peek',
                           'cumulative')
        for option in profile_options:
            # Getting and checking output of profile info.
            ret, _, _ = profile_info(self.mnode, self.volname, options=option)
            self.assertNotEqual(
                ret, 0, "Unexpected: Successfully ran profile info"
                " %s on volume: %s" % (option, self.volname))
            g.log.info(
                "Expected: Failed to execute profile info %s on"
                " volume: %s", option, self.volname)

        # Chekcing for core files.
        ret = is_core_file_created(self.servers, test_timestamp)
        self.assertTrue(ret, "glusterd service should not crash")
        g.log.info("No core file found, glusterd service running "
                   "successfully")

        # Checking whether glusterd is running or not
        ret = is_glusterd_running(self.servers)
        self.assertEqual(ret, 0, "Glusterd has crashed on nodes.")
        g.log.info("No glusterd crashes observed.")
Пример #11
0
    def test_ec_truncate_file_with_brick_down(self):
        """
        Test steps:
        1. Create a volume, start and mount it on a client
        2. Bring down redundant bricks in the subvol
        3. Create a file on the volume using "touch"
        4. Truncate the file using "O_TRUNC"
        5. Bring the brick online
        6. Write data on the file and wait for heal completion
        7. Check for crashes and coredumps
        """
        # pylint: disable=unsubscriptable-object
        for restart_type in ("volume_start", "node_reboot"):
            # Time stamp from mnode for checking cores at the end of test
            ret, test_timestamp, _ = g.run(self.mnode, "date +%s")
            self.assertEqual(ret, 0, "date command failed")
            test_timestamp = test_timestamp.strip()

            # Create a file using touch
            file_name = self.mounts[0].mountpoint + "/test_1"
            ret, _, err = g.run(self.mounts[0].client_system,
                                "touch {}".format(file_name))
            self.assertEqual(ret, 0, "File creation failed")
            g.log.info("File Created successfully")

            # List two bricks in each subvol
            subvols = get_subvols(self.mnode, self.volname)['volume_subvols']
            bricks_to_bring_offline = []
            for subvol in subvols:
                self.assertTrue(subvol, "List is empty")
                bricks_to_bring_offline.extend(sample(subvol, 2))

            # Bring two bricks of each subvol offline
            ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
            self.assertTrue(ret, "Bricks are still online")

            # Validating the bricks are offline or not
            ret = are_bricks_offline(self.mnode, self.volname,
                                     bricks_to_bring_offline)
            self.assertTrue(
                ret, "Few of the bricks are still online in"
                " {} in".format(bricks_to_bring_offline))

            # Truncate the file
            cmd = (
                'python -c "import os, sys; fd = os.open(\'{}\', os.O_TRUNC )'
                '; os.close( fd )"').format(file_name)
            ret, _, err = g.run(self.mounts[0].client_system, cmd)
            self.assertEqual(ret, 0, err)
            g.log.info("File truncated successfully")

            # Bring back the bricks online
            if restart_type == "volume_start":
                # Bring back bricks online by volume start
                ret, _, err = volume_start(self.mnode,
                                           self.volname,
                                           force=True)
                self.assertEqual(ret, 0, err)
                g.log.info("All bricks are online")
            elif restart_type == "node_reboot":
                # Bring back the bricks online by node restart
                for brick in bricks_to_bring_offline:
                    node_to_reboot = brick.split(":")[0]
                    ret = reboot_nodes_and_wait_to_come_online(node_to_reboot)
                    self.assertTrue(
                        ret, "Reboot Failed on node: "
                        "{}".format(node_to_reboot))
                    g.log.info("Node: %s rebooted successfully",
                               node_to_reboot)
                    time.sleep(60)

            # Check whether bricks are online or not
            ret = are_bricks_online(self.mnode, self.volname,
                                    bricks_to_bring_offline)
            self.assertTrue(
                ret,
                "Bricks {} are still offline".format(bricks_to_bring_offline))

            # write data to the file
            cmd = ('python -c "import os, sys;fd = os.open(\'{}\', '
                   'os.O_RDWR) ;'
                   'os.write(fd, \'This is test after truncate\'.encode());'
                   ' os.close(fd)"').format(file_name)

            ret, _, err = g.run(self.mounts[0].client_system, cmd)
            self.assertEqual(ret, 0, err)
            g.log.info("Data written successfully on to the file")

            # Monitor heal completion
            ret = monitor_heal_completion(self.mnode, self.volname)
            self.assertTrue(ret, "Heal pending for file {}".format(file_name))

            # check for any crashes on servers and client
            for nodes in (self.servers, [self.clients[0]]):
                ret = is_core_file_created(nodes, test_timestamp)
                self.assertTrue(ret,
                                "Cores found on the {} nodes".format(nodes))
Пример #12
0
    def test_peer_detach_host(self):
        # pylint: disable = too-many-statements
        # peer Detaching specified server from cluster
        # peer Detaching detached server again and checking the error msg
        # peer Detaching invalid host
        # peer Detaching Non exist host
        # peer Checking Core file created or not
        # Peer detach one node which contains the bricks of volume created
        # Peer detach force a node which is hosting bricks of a volume
        # Peer detach one node which hosts bricks of offline volume
        # Peer detach force a node which hosts bricks of offline volume

        # Timestamp of current test case of start time
        ret, test_timestamp, _ = g.run_local('date +%s')
        test_timestamp = test_timestamp.strip()

        # Assigning non existing host to variable
        self.non_exist_host = '256.256.256.256'

        # Assigning invalid ip to variable
        self.invalid_ip = '10.11.a'

        # Peer detach to specified server
        g.log.info("Start detach specified server :%s", self.servers[1])
        ret, _, _ = peer_detach(self.mnode, self.servers[1])
        self.assertEqual(ret, 0,
                         "Failed to detach server :%s" % self.servers[1])

        # Detached server detaching again, Expected to fail detach
        g.log.info("Start detached server detaching "
                   "again : %s", self.servers[1])
        ret, _, err = peer_detach(self.mnode, self.servers[1])
        self.assertNotEqual(
            ret, 0, "Detach server should "
            "fail :%s" % self.servers[1])
        self.assertEqual(
            err, "peer detach: failed: %s is not part of "
            "cluster\n" % self.servers[1], "Peer "
            "Detach didn't fail as expected")

        # Probing detached server
        g.log.info("Start probing detached server : %s", self.servers[1])
        ret = peer_probe_servers(self.mnode, self.servers[1])
        self.assertTrue(
            ret, "Peer probe failed from %s to other "
            "server : %s" % (self.mnode, self.servers[1]))

        # Detach invalid host
        g.log.info("Start detaching invalid host :%s ", self.invalid_ip)
        ret, _, _ = peer_detach(self.mnode, self.invalid_ip)
        self.assertNotEqual(
            ret, 0, "Detach invalid host should "
            "fail :%s" % self.invalid_ip)

        # Detach non exist host
        g.log.info("Start detaching non exist host : %s", self.non_exist_host)
        ret, _, _ = peer_detach(self.mnode, self.non_exist_host)
        self.assertNotEqual(
            ret, 0, "Detach non existing host "
            "should fail :%s" % self.non_exist_host)

        # Creating Volume
        g.log.info("Started creating volume: %s", self.volname)
        ret = self.setup_volume()
        self.assertTrue(ret, "Volume creation failed: %s" % self.volname)

        # Peer detach one node which contains the bricks of the volume created
        g.log.info(
            "Start detaching server %s which is hosting "
            "bricks of a volume", self.servers[1])
        self.check_detach_error_message(use_force=False)

        #  Peer detach force a node which is hosting bricks of a volume
        g.log.info(
            "Start detaching server using force %s which is hosting "
            "bricks of a volume", self.servers[1])
        self.check_detach_error_message()

        # Peer detach one node which contains bricks of an offline volume
        g.log.info("stopping the volume")
        ret, _, err = volume_stop(self.mnode, self.volname)
        msg = ('volume stop: ' + 'self.volname' + ': failed: Volume ' +
               'self.volname' + ' is not in the started state\n')
        if msg not in err:
            self.assertEqual(ret, 0,
                             "stopping volume %s failed" % self.volname)
        g.log.info(
            "Start to detach server %s which is hosting "
            "bricks of an offline volume", self.servers[1])
        self.check_detach_error_message(use_force=False)

        # Forceful Peer detach node which hosts bricks of offline volume
        g.log.info(
            "start detaching server %s with force option "
            "which is hosting bricks of a volume", self.servers[1])
        self.check_detach_error_message()

        # starting volume for proper cleanup
        ret, _, _ = volume_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, "volume start failed")

        # Checking core. file created or not in "/", "/tmp", "/log/var/core
        # directory
        ret = is_core_file_created(self.servers, test_timestamp)
        self.assertTrue(ret, "glusterd service should not crash")
        g.log.info("No core file found, glusterd service running "
                   "successfully")
Пример #13
0
    def test_profile_simultaneously_on_different_nodes(self):
        """
        Test Case:
        1) Create a volume and start it.
        2) Mount volume on client and start IO.
        3) Start profile on the volume.
        4) Create another volume.
        5) Start profile on the volume.
        6) Run volume status in a loop in one of the node.
        7) Run profile info for the new volume on one of the other node
        8) Run profile info for the new volume in loop for 100 times on
           the other node
        """
        # Timestamp of current test case of start time
        ret, test_timestamp, _ = g.run_local('date +%s')
        test_timestamp = test_timestamp.strip()

        # Start IO on mount points.
        self.all_mounts_procs = []
        counter = 1
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dir-depth 4 "
                   "--dir-length 6 "
                   "--dirname-start-num %d "
                   "--max-num-of-dirs 3 "
                   "--num-of-files 5 %s" %
                   (self.script_upload_path, counter, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
            counter += 1

        # Start profile on volume.
        ret, _, _ = profile_start(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, "Failed to start profile on volume: %s" % self.volname)
        g.log.info("Successfully started profile on volume: %s", self.volname)

        # Validate IO
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")
        g.log.info("IO validation complete.")

        # Create and start a volume
        self.volume['name'] = "volume_2"
        self.volname = "volume_2"
        ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
        self.assertTrue(ret, "Failed to create and start volume")
        g.log.info("Successfully created and started volume_2")

        # Start profile on volume.
        ret, _, _ = profile_start(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, "Failed to start profile on volume: %s" % self.volname)
        g.log.info("Successfully started profile on volume: %s", self.volname)

        # Run volume status on one of the node in loop
        cmd = "for i in `seq 1 100`;do gluster v status;done"
        proc1 = g.run_async(self.servers[1], cmd)

        # Check profile on one of the other node
        cmd = "gluster v profile %s info" % self.volname
        ret, _, _ = g.run(self.mnode, cmd)
        self.assertEqual(
            ret, 0, "Failed to run profile info on volume: %s"
            " on node %s" % (self.volname, self.mnode))
        g.log.info("Successfully run pofile info on volume: %s on node %s",
                   self.volname, self.mnode)

        # Run volume profile info on one of the other node in loop
        cmd = """for i in `seq 1 100`;do gluster v profile %s info;
              done""" % self.volname
        proc2 = g.run_async(self.servers[3], cmd)

        ret1, _, _ = proc1.async_communicate()
        ret2, _, _ = proc2.async_communicate()

        self.assertEqual(
            ret1, 0, "Failed to run volume status in a loop"
            " on node %s" % self.servers[1])
        g.log.info(
            "Successfully running volume status in a loop on node"
            " %s", self.servers[1])

        self.assertEqual(
            ret2, 0, "Failed to run profile info in a loop"
            " on node %s" % self.servers[3])
        g.log.info(
            "Successfully running volume status in a loop on node"
            " %s", self.servers[3])

        # Chekcing for core files.
        ret = is_core_file_created(self.servers, test_timestamp)
        self.assertTrue(ret, "glusterd service should not crash")
        g.log.info("No core file found, glusterd service running "
                   "successfully")

        # Checking whether glusterd is running or not
        ret = is_glusterd_running(self.servers)
        self.assertEqual(ret, 0, "Glusterd has crashed on nodes.")
        g.log.info("No glusterd crashes observed.")