示例#1
0
 def tearDown(self):
     try:
         if self.pool is not None:
             self.pool.disconnect()
             self.pool.destroy(1)
     finally:
         server_utils.stop_server(hosts=self.hostlist)
示例#2
0
文件: test.py 项目: paf-49/daos
    def stop_servers(self):
        """Stop the daos server and I/O servers.

        Returns:
            list: a list of exceptions raised stopping the servers

        """
        error_list = []
        if self.hostfile_servers:
            # Reset the nvme storage
            if self.nvme_parameter == "nvme":
                self.multi_log("Resetting NVMe storage on the servers")
                try:
                    server_utils.storage_reset(self.hostlist_servers)
                except server_utils.ServerFailed as error:
                    self.multi_log("  {}".format(error))
                    error_list.append(
                        "Error resetting nvme storage: {}".format(error))

            # Stop the servers
            self.multi_log("Stopping servers")
            try:
                server_utils.stop_server(hosts=self.hostlist_servers)
            except server_utils.ServerFailed as error:
                self.multi_log("  {}".format(error))
                error_list.append("Error stopping servers: {}".format(error))
        return error_list
示例#3
0
 def tearDown(self):
     """
     test teardown
     """
     if self.agent_sessions:
         agent_utils.stop_agent(self.agent_sessions)
     server_utils.stop_server(hosts=self.hostlist_servers)
示例#4
0
 def tearDown(self):
     try:
         os.remove(self.hostfile_servers)
     finally:
         if self.agent_sessions:
             agent_utils.stop_agent(self.agent_sessions)
         server_utils.stop_server(hosts=self.hostlist_servers)
示例#5
0
 def tearDown(self):
     try:
         if self.pool is not None:
             self.pool.disconnect()
             self.pool.destroy(1)
     finally:
         server_utils.stop_server(hosts=self.hostlist)
示例#6
0
 def tearDown(self):
     try:
         os.remove(self.hostfile)
     finally:
         if self.agent_sessions:
             AgentUtils.stop_agent(self.hostlist, self.agent_sessions)
         server_utils.stop_server(hosts=self.hostlist)
示例#7
0
    def tearDown(self):

        try:
            if self.container:
                self.container.close()

            # wait a few seconds and then destroy
            time.sleep(5)
            if self.container:
                self.container.destroy()

            # cleanup the pool
            if self.pool:
                self.pool.disconnect()
                self.pool.destroy(1)

            if self.hostfile is not None:
                os.remove(self.hostfile)

        except DaosApiError as excep:
            print(excep)
            print(traceback.format_exc())
            self.fail("Test failed during teardown.\n")

        finally:
            if self.agent_sessions:
                AgentUtils.stop_agent(self.hostlist, self.agent_sessions)
            server_utils.stop_server(hosts=self.hostlist)
示例#8
0
    def test_simple_delete(self):
        """
        Test destroying a pool created on a single server, nobody is using
        the pool, force is not needed.

        :avocado: tags=pool,pooldestroy,quick
        """
        self.hostlist = self.params.get("test_machines1", '/run/hosts/')
        hostfile = write_host_file.write_host_file(self.hostlist, self.tmp)

        self.agent_sessions = AgentUtils.run_agent(self.basepath, self.hostlist)
        server_utils.run_server(hostfile, self.server_group, self.basepath)

        setid = self.params.get("setname",
                                '/run/setnames/validsetname/')

        try:
            # use the uid/gid of the user running the test, these should
            # be perfectly valid
            uid = os.geteuid()
            gid = os.getegid()

            # TODO make these params in the yaml
            daosctl = self.basepath + '/install/bin/daosctl'

            create_cmd = ('{0} create-pool -m {1} -u {2} -g {3} -s {4}'
                          .format(daosctl, 0x731, uid, gid, setid))

            uuid_str = """{0}""".format(process.system_output(create_cmd))
            print ("uuid is {0}\n".format(uuid_str))

            host = self.hostlist[0]
            exists = check_for_pool.check_for_pool(host, uuid_str)
            if exists != 0:
                self.fail("Pool {0} not found on host {1}.\n"
                          .format(uuid_str, host))

            delete_cmd = ('{0} destroy-pool -i {1} -s {2}'
                          .format(daosctl, uuid_str, setid))
            process.system(delete_cmd)

            exists = check_for_pool.check_for_pool(host, uuid_str)
            if exists == 0:
                self.fail("Pool {0} found on host {1} when not expected.\n"
                          .format(uuid_str, host))

        except Exception as excep:
            print(excep)
            print(traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")

        # no matter what happens shutdown the server
        finally:
            try:
                os.remove(hostfile)
            finally:
                if self.agent_sessions:
                    AgentUtils.stop_agent(self.hostlist, self.agent_sessions)
                server_utils.stop_server(hosts=self.hostlist)
示例#9
0
    def test_destroy_connect(self):
        """
        Test destroying a pool that has a connected client with force == false.
        Should fail.

        :avocado: tags=pool,pooldestroy,x
        """
        host = self.hostlist_servers[0]
        try:

            # write out a hostfile_servers and start the servers with it
            self.hostlist_servers = self.params.get("test_machines1",
                                                    '/run/hosts/')
            hostfile_servers = write_host_file.write_host_file(
                self.hostlist_servers, self.tmp)

            self.agent_sessions = agent_utils.run_agent(
                self.basepath, self.hostlist_servers)
            server_utils.run_server(hostfile_servers, self.server_group,
                                    self.basepath)

            # parameters used in pool create
            createmode = self.params.get("mode", '/run/poolparams/createmode/')
            createuid = self.params.get("uid", '/run/poolparams/createuid/')
            creategid = self.params.get("gid", '/run/poolparams/creategid/')
            createsetid = self.params.get("setname",
                                          '/run/poolparams/createset/')
            createsize = self.params.get("size", '/run/poolparams/createsize/')

            # initialize a python pool object then create the underlying
            # daos storage
            pool = DaosPool(self.context)
            pool.create(createmode, createuid, creategid, createsize,
                        createsetid, None)

            # need a connection to create container
            pool.connect(1 << 1)

            # destroy pool with connection open
            pool.destroy(0)

            # should throw an exception and not hit this
            self.fail("Shouldn't hit this line.\n")

        except DaosApiError as excep:
            print("got exception which is expected so long as it is BUSY")
            print(excep)
            print(traceback.format_exc())
            # pool should still be there
            exists = check_for_pool.check_for_pool(host, pool.get_uuid_str)
            if exists != 0:
                self.fail("Pool gone, but destroy should have failed.\n")

        # no matter what happens cleanup
        finally:
            if self.agent_sessions:
                agent_utils.stop_agent(self.agent_sessions)
            server_utils.stop_server(hosts=self.hostlist_servers)
            os.remove(hostfile_servers)
示例#10
0
 def tearDown(self):
     try:
         if self.container:
             self.container.close()
     finally:
         if self.agent_sessions:
             AgentUtils.stop_agent(self.hostlist, self.agent_sessions)
         server_utils.stop_server(hosts=self.hostlist)
 def tearDown(self):
     try:
         if self.pool is not None and self.pool.attached:
             self.pool.destroy(1)
     finally:
         if self.agent_sessions:
             AgentUtils.stop_agent(self.hostlist, self.agent_sessions)
         server_utils.stop_server(hosts=self.hostlist)
示例#12
0
 def tearDown(self):
     try:
         if self.agent_sessions:
             AgentUtils.stop_agent(self.hostlist, self.agent_sessions)
         server_utils.stop_server(hosts=self.hostlist)
     finally:
         # really make sure everything is gone
         check_for_pool.cleanup_pools(self.hostlist)
示例#13
0
文件: seg_count.py 项目: morsiee/daos
 def tearDown(self):
     try:
         if self.pool is not None and self.pool.attached:
             self.pool.destroy(1)
     finally:
         if self.agent_sessions:
             agent_utils.stop_agent(self.agent_sessions,
                                    self.hostlist_clients)
         server_utils.stop_server(hosts=self.hostlist_servers)
示例#14
0
 def tearDown(self):
     try:
         os.remove(self.hostfile_servers)
         os.remove(self.uri_file)
     finally:
         if self.agent_sessions:
             agent_utils.stop_agent(self.agent_sessions)
         server_utils.stop_server(hosts=self.hostlist_servers)
         super(CartSelfTest, self).tearDown()
示例#15
0
    def test_destroy_connect(self):
        """
        Test destroying a pool that has a connected client with force == false.
        Should fail.

        :avocado: tags=pool,pooldestroy,x
        """
        host = self.hostlist[0]
        try:

            # write out a hostfile and start the servers with it
            self.hostlist = self.params.get("test_machines1", '/run/hosts/')
            hostfile = write_host_file.write_host_file(self.hostlist, self.tmp)

            self.agent_sessions = AgentUtils.run_agent(self.basepath,
                                                       self.hostlist)
            server_utils.run_server(hostfile, self.server_group, self.basepath)

            # parameters used in pool create
            createmode = self.params.get("mode", '/run/poolparams/createmode/')
            createuid = self.params.get("uid", '/run/poolparams/createuid/')
            creategid = self.params.get("gid", '/run/poolparams/creategid/')
            createsetid = self.params.get("setname",
                                          '/run/poolparams/createset/')
            createsize = self.params.get("size", '/run/poolparams/createsize/')

            # initialize a python pool object then create the underlying
            # daos storage
            pool = DaosPool(self.context)
            pool.create(createmode, createuid, creategid,
                        createsize, createsetid, None)

            # need a connection to create container
            pool.connect(1 << 1)

            # destroy pool with connection open
            pool.destroy(0)

            # should throw an exception and not hit this
            self.fail("Shouldn't hit this line.\n")

        except DaosApiError as excep:
            print("got exception which is expected so long as it is BUSY")
            print(excep)
            print(traceback.format_exc())
            # pool should still be there
            exists = check_for_pool.check_for_pool(host, pool.get_uuid_str)
            if exists != 0:
                self.fail("Pool gone, but destroy should have failed.\n")

        # no matter what happens cleanup
        finally:
            if self.agent_sessions:
                AgentUtils.stop_agent(self.hostlist, self.agent_sessions)
            server_utils.stop_server(hosts=self.hostlist)
            os.remove(hostfile)
示例#16
0
 def tearDown(self):
     # shut 'er down
     try:
         if self.pool:
             self.pool.destroy(1)
         os.remove(self.hostfile_servers)
     finally:
         if self.agent_sessions:
             agent_utils.stop_agent(self.agent_sessions)
         server_utils.stop_server(hosts=self.hostlist_servers)
示例#17
0
 def tearDown(self):
     try:
         if self.hostfile is not None:
             os.remove(self.hostfile)
         if self.pool:
             self.pool.destroy(1)
     finally:
         if self.agent_sessions:
             AgentUtils.stop_agent(self.hostlist, self.agent_sessions)
         server_utils.stop_server(hosts=self.hostlist)
示例#18
0
 def tearDown(self):
     try:
         if self.container:
             self.container.close()
             self.container.destroy()
         if self.pool:
             self.pool.disconnect()
             self.pool.destroy(1)
     finally:
         server_utils.stop_server()
示例#19
0
 def tearDown(self):
     try:
         if self.container:
             self.container.close()
             self.container.destroy()
         if self.pool:
             self.pool.disconnect()
             self.pool.destroy(1)
     finally:
         server_utils.stop_server()
示例#20
0
 def tearDown(self):
     try:
         self.container.close()
         self.container.destroy()
         self.pool.disconnect()
         self.pool.destroy(1)
     finally:
         if self.agent_sessions:
             AgentUtils.stop_agent(self.hostlist, self.agent_sessions)
         server_utils.stop_server(hosts=self.hostlist)
示例#21
0
文件: test.py 项目: daos-stack/daos
    def tearDown(self):

        try:
            if self.agent_sessions:
                AgentUtils.stop_agent(self.hostlist_servers,
                                      self.agent_sessions)
        finally:
            server_utils.stop_server(hosts=self.hostlist_servers)

        super(TestWithServers, self).tearDown()
示例#22
0
 def tearDown(self):
     try:
         if self.pool_connect:
             self.pool.disconnect()
         if self.pool:
             self.pool.destroy(1)
     finally:
         if self.agent_sessions:
             AgentUtils.stop_agent(self.hostlist_clients,
                                   self.agent_sessions)
         server_utils.stop_server(hosts=self.hostlist)
示例#23
0
文件: metadata.py 项目: morsiee/daos
 def tearDown(self):
     try:
         if self.pool_connect:
             self.pool.disconnect()
         if self.pool:
             self.pool.destroy(1)
     finally:
         if self.agent_sessions:
             agent_utils.stop_agent(self.agent_sessions,
                                    self.hostlist_clients)
         server_utils.stop_server(hosts=self.hostlist_servers)
示例#24
0
    def tearDown(self):
        """ cleanup after the test """

        try:
            os.remove(self.hostfile)
            if self.pool:
                self.pool.destroy(1)
        finally:
            if self.agent_sessions:
                AgentUtils.stop_agent(self.hostlist, self.agent_sessions)
            server_utils.stop_server(hosts=self.hostlist)
示例#25
0
    def tearDown(self):
        """ cleanup after the test """

        try:
            os.remove(self.hostfile_servers)
            if self.pool:
                self.pool.destroy(1)
        finally:
            if self.agent_sessions:
                agent_utils.stop_agent(self.agent_sessions)
            server_utils.stop_server(hosts=self.hostlist_servers)
示例#26
0
 def tearDown(self):
     try:
         if self.container:
             self.container.close()
             self.container.destroy()
         if self.pool:
             self.pool.disconnect()
             self.pool.destroy(1)
     finally:
         if self.agent_sessions:
             agent_utils.stop_agent(self.agent_sessions)
         server_utils.stop_server(hosts=self.hostlist)
 def tearDown(self):
     # shut 'er down
     """
     wrap pool destroy in a try; in case pool create didn't succeed, we
     still need the server to be shut down in any case
     """
     try:
         self.pool.destroy(1)
     finally:
         if self.agent_sessions:
             AgentUtils.stop_agent(self.hostlist, self.agent_sessions)
         server_utils.stop_server(hosts=self.hostlist)
示例#28
0
 def tearDown(self):
     try:
         if self.hostfile_clients is not None:
             os.remove(self.hostfile_clients)
         if self.hostfile_servers is not None:
             os.remove(self.hostfile_servers)
         if self.pool is not None and self.pool.attached:
             self.pool.destroy(1)
     finally:
         if self.agent_sessions:
             AgentUtils.stop_agent(self.hostlist_clients,
                                   self.agent_sessions)
         server_utils.stop_server(hosts=self.hostlist_servers)
示例#29
0
 def tearDown(self):
     """Tear down after each test case."""
     try:
         if self.agent_sessions:
             self.d_log.info("Stopping agents")
             agent_utils.stop_agent(self.agent_sessions,
                                    self.hostlist_clients)
     finally:
         self.d_log.info("Stopping servers")
         try:
             server_utils.stop_server(hosts=self.hostlist_servers)
         finally:
             super(TestWithServers, self).tearDown()
示例#30
0
    def test_many_servers(self):
        """
        Test destroy on a large (relative) number of servers.

        :avocado: tags=pool,pooldestroy,destroybig
        """
        try:
            # write out a hostfile_servers and start the servers with it
            self.hostlist_servers = self.params.get("test_machines6",
                                                    '/run/hosts/')
            hostfile_servers = write_host_file.write_host_file(
                self.hostlist_servers, self.tmp)

            self.agent_sessions = agent_utils.run_agent(
                self.basepath, self.hostlist_servers)
            server_utils.run_server(hostfile_servers, self.server_group,
                                    self.basepath)

            # parameters used in pool create
            createmode = self.params.get("mode", '/run/poolparams/createmode/')
            createuid = self.params.get("uid", '/run/poolparams/createuid/')
            creategid = self.params.get("gid", '/run/poolparams/creategid/')
            createsetid = self.params.get("setname",
                                          '/run/poolparams/createset/')
            createsize = self.params.get("size", '/run/poolparams/createsize/')

            # initialize a python pool object then create the underlying
            # daos storage
            pool = DaosPool(self.context)
            pool.create(createmode, createuid, creategid, createsize,
                        createsetid, None)

            time.sleep(1)

            # okay, get rid of it
            pool.destroy(1)

        except DaosApiError as excep:
            print(excep)
            print(traceback.format_exc())
            self.fail("6 server test failed.\n")

        except Exception as excep:
            self.fail("Daos code segfaulted most likely.  Error: %s" % excep)

        # no matter what happens cleanup
        finally:
            if self.agent_sessions:
                agent_utils.stop_agent(self.agent_sessions)
            server_utils.stop_server(hosts=self.hostlist_servers)
            os.remove(hostfile_servers)
示例#31
0
文件: open.py 项目: daos-stack/daos
 def tearDown(self):
     try:
         if self.container1 is not None:
             self.container1.destroy()
         if self.container2 is not None:
             self.container2.destroy()
         if self.pool1 is not None and self.pool1.attached:
             self.pool1.destroy(1)
         if self.pool2 is not None and self.pool2.attached:
             self.pool2.destroy(1)
     finally:
         if self.agent_sessions:
             AgentUtils.stop_agent(self.hostlist, self.agent_sessions)
         server_utils.stop_server(hosts=self.hostlist)
示例#32
0
    def test_many_servers(self):
        """
        Test destroy on a large (relative) number of servers.

        :avocado: tags=pool,pooldestroy,destroybig
        """
        try:
            # write out a hostfile and start the servers with it
            self.hostlist = self.params.get("test_machines6", '/run/hosts/')
            hostfile = write_host_file.write_host_file(self.hostlist, self.tmp)

            self.agent_sessions = AgentUtils.run_agent(self.basepath,
                                                       self.hostlist)
            server_utils.run_server(hostfile, self.server_group, self.basepath)

            # parameters used in pool create
            createmode = self.params.get("mode", '/run/poolparams/createmode/')
            createuid = self.params.get("uid", '/run/poolparams/createuid/')
            creategid = self.params.get("gid", '/run/poolparams/creategid/')
            createsetid = self.params.get("setname",
                                          '/run/poolparams/createset/')
            createsize = self.params.get("size", '/run/poolparams/createsize/')

            # initialize a python pool object then create the underlying
            # daos storage
            pool = DaosPool(self.context)
            pool.create(createmode, createuid, creategid,
                        createsize, createsetid, None)

            time.sleep(1)

            # okay, get rid of it
            pool.destroy(1)

        except DaosApiError as excep:
            print(excep)
            print(traceback.format_exc())
            self.fail("6 server test failed.\n")

        except Exception as excep:
            self.fail("Daos code segfaulted most likely.  Error: %s" % excep)

        # no matter what happens cleanup
        finally:
            if self.agent_sessions:
                AgentUtils.stop_agent(self.hostlist, self.agent_sessions)
            server_utils.stop_server(hosts=self.hostlist)
            os.remove(hostfile)
示例#33
0
    def test_delete_doesnt_exist(self):
        """
        Test destroying a pool uuid that doesn't exist.

        :avocado: tags=pool,pooldestroy
        """
        self.hostlist_servers = self.params.get("test_machines1",
                                                '/run/hosts/')
        hostfile_servers = write_host_file.write_host_file(
            self.hostlist_servers, self.tmp)

        self.agent_sessions = agent_utils.run_agent(self.basepath,
                                                    self.hostlist_servers)
        server_utils.run_server(hostfile_servers, self.server_group,
                                self.basepath)

        setid = self.params.get("setname", '/run/setnames/validsetname/')
        host = self.hostlist_servers[0]
        try:
            # randomly selected uuid, that is exceptionally unlikely to exist
            bogus_uuid = '81ef94d7-a59d-4a5e-935b-abfbd12f2105'

            # TODO make these params in the yaml
            daosctl = self.basepath + '/install/bin/daosctl'

            delete_cmd = ('{0} destroy-pool -i {1} -s {2}'.format(
                daosctl, bogus_uuid, setid))

            process.system(delete_cmd)

            # the above command should fail resulting in an exception so if
            # we get here the test has failed
            self.fail("Pool {0} found on host {1} when not expected.\n".format(
                bogus_uuid, host))

        except Exception as _excep:
            # expecting an exception so catch and pass the test
            pass

        # no matter what happens shutdown the server
        finally:
            if self.agent_sessions:
                agent_utils.stop_agent(self.agent_sessions)
            server_utils.stop_server(hosts=self.hostlist_servers)
            os.remove(hostfile_servers)
示例#34
0
    def test_delete_doesnt_exist(self):
        """
        Test destroying a pool uuid that doesn't exist.

        :avocado: tags=pool,pooldestroy
        """
        self.hostlist = self.params.get("test_machines1", '/run/hosts/')
        hostfile = write_host_file.write_host_file(self.hostlist, self.tmp)

        self.agent_sessions = AgentUtils.run_agent(self.basepath, self.hostlist)
        server_utils.run_server(hostfile, self.server_group, self.basepath)

        setid = self.params.get("setname",
                                '/run/setnames/validsetname/')
        host = self.hostlist[0]
        try:
            # randomly selected uuid, that is exceptionally unlikely to exist
            bogus_uuid = '81ef94d7-a59d-4a5e-935b-abfbd12f2105'

            # TODO make these params in the yaml
            daosctl = self.basepath + '/install/bin/daosctl'

            delete_cmd = ('{0} destroy-pool -i {1} -s {2}'.format(daosctl,
                                                                  bogus_uuid,
                                                                  setid))

            process.system(delete_cmd)

            # the above command should fail resulting in an exception so if
            # we get here the test has failed
            self.fail("Pool {0} found on host {1} when not expected.\n"
                      .format(bogus_uuid, host))

        except Exception as _excep:
            # expecting an exception so catch and pass the test
            pass

        # no matter what happens shutdown the server
        finally:
            if self.agent_sessions:
                AgentUtils.stop_agent(self.hostlist, self.agent_sessions)
            server_utils.stop_server(hosts=self.hostlist)
            os.remove(hostfile)
示例#35
0
    def tearDown(self):
        if self.agent_sessions:
            AgentUtils.stop_agent(self.hostlist, self.agent_sessions)
        server_utils.stop_server(hosts=self.hostlist)

        # collect up a debug log so that we have a separate one for each
        # subtest
        if self.subtest_name:
            try:
                new_logfile = os.path.join(self.log_dir,
                                           self.subtest_name + "_" + \
                                           self.server_log)
                # rename on each of the servers
                for host in self.hostlist:
                    subprocess.check_call(['ssh', host,
                                           '[ -f \"{0}\" ] && '
                                           ' mv \"{0}\" '
                                           ' \"{1}\"'.format("/tmp/server.log",
                                                             new_logfile)])
            except KeyError:
                pass
示例#36
0
文件: soak.py 项目: morsiee/daos
 def tearDown(self):
     server_utils.stop_server(hosts=self.hostlist_servers)
示例#37
0
    def test_rebuild_with_io(self):
        """
        Test ID: Rebuild-003

        Test Description: Trigger a rebuild while I/O is ongoing.

        Use Cases:
          -- single pool, single client performing continous read/write/verify
             sequence while failure/rebuild is triggered in another process

        :avocado: tags=pool,rebuild,rebuildwithio
        """

        # the rebuild tests need to redo this stuff each time so not in setup
        # as it usually would be
        server_group = self.params.get("name", '/server_config/',
                                       'daos_server')

        basepath = os.path.normpath(self.build_paths['PREFIX'] + "/../")

        self.hostlist = self.params.get("test_machines", '/run/hosts/')
        hostfile = write_host_file.write_host_file(self.hostlist, self.workdir)

        try:
            self.agent_sessions = AgentUtils.run_agent(basepath, self.hostlist)
            server_utils.run_server(hostfile, server_group, basepath)

            # use the uid/gid of the user running the test, these should
            # be perfectly valid
            createuid = os.geteuid()
            creategid = os.getegid()

            # parameters used in pool create that are in yaml
            createmode = self.params.get("mode", '/run/testparams/createmode/')
            createsetid = self.params.get("setname",
                                          '/run/testparams/createset/')
            createsize = self.params.get("size", '/run/testparams/createsize/')

            # initialize a python pool object then create the underlying
            # daos storage
            pool = DaosPool(self.context)
            pool.create(createmode, createuid, creategid,
                        createsize, createsetid, None)
            pool.connect(1 << 1)
            container = DaosContainer(self.context)
            container.create(pool.handle)
            container.open()

            # get pool status and make sure it all looks good before we start
            pool.pool_query()
            if pool.pool_info.pi_ndisabled != 0:
                self.fail("Number of disabled targets reporting incorrectly.\n")
            if pool.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error but rebuild hasn't run.\n")
            if pool.pool_info.pi_rebuild_st.rs_done != 1:
                self.fail("Rebuild is running but device hasn't failed yet.\n")
            if pool.pool_info.pi_rebuild_st.rs_obj_nr != 0:
                self.fail("Rebuilt objs not zero.\n")
            if pool.pool_info.pi_rebuild_st.rs_rec_nr != 0:
                self.fail("Rebuilt recs not zero.\n")
            dummy_pool_version = pool.pool_info.pi_rebuild_st.rs_version

            # do I/O for 30 seconds
            dummy_bw = io_utilities.continuous_io(container, 30)

            # trigger the rebuild
            rank = self.params.get("rank", '/run/testparams/ranks/*')
            server = DaosServer(self.context, server_group, rank)
            server.kill(1)
            pool.exclude([rank])

            # do another 30 seconds of I/O,
            # waiting for some improvements in server bootstrap
            # at which point we can move the I/O to a separate client and
            # really pound it with I/O
            dummy_bw = io_utilities.continuous_io(container, 30)

            # wait for the rebuild to finish
            while True:
                pool.pool_query()
                if pool.pool_info.pi_rebuild_st.rs_done == 1:
                    break
                else:
                    time.sleep(2)

            # check rebuild statistics
            if pool.pool_info.pi_ndisabled != 1:
                self.fail("Number of disabled targets reporting incorrectly: {}"
                          .format(pool.pool_info.pi_ndisabled))
            if pool.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error reported: {}".format(
                    pool.pool_info.pi_rebuild_st.rs_errno))
            if pool.pool_info.pi_rebuild_st.rs_obj_nr <= 0:
                self.fail("No objects have been rebuilt.")
            if pool.pool_info.pi_rebuild_st.rs_rec_nr <= 0:
                self.fail("No records have been rebuilt.")

        except (ValueError, DaosApiError) as excep:
            print(excep)
            print(traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")

        finally:
            # wait for the I/O process to finish
            try:
                server_utils.stop_server(hosts=self.hostlist)
                os.remove(hostfile)
                # really make sure everything is gone
                check_for_pool.cleanup_pools(self.hostlist)
            finally:
                if self.agent_sessions:
                    AgentUtils.stop_agent(self.hostlist, self.agent_sessions)
                server_utils.kill_server(self.hostlist)
示例#38
0
    def test_destroy_recreate(self):
        """
        Test destroy and recreate one right after the other multiple times
        Should fail.

        :avocado: tags=pool,pooldestroy,destroyredo
        """

        try:
            # write out a hostfile_servers and start the servers with it
            self.hostlist_servers = self.params.get("test_machines1",
                                                    '/run/hosts/')
            hostfile_servers = write_host_file.write_host_file(
                self.hostlist_servers, self.tmp)

            self.agent_sessions = agent_utils.run_agent(
                self.basepath, self.hostlist_servers)
            server_utils.run_server(hostfile_servers, self.server_group,
                                    self.basepath)

            # parameters used in pool create
            createmode = self.params.get("mode", '/run/poolparams/createmode/')
            createuid = self.params.get("uid", '/run/poolparams/createuid/')
            creategid = self.params.get("gid", '/run/poolparams/creategid/')
            createsetid = self.params.get("setname",
                                          '/run/poolparams/createset/')
            createsize = self.params.get("size", '/run/poolparams/createsize/')

            # initialize a python pool object then create the underlying
            # daos storage
            pool = DaosPool(self.context)
            pool.create(createmode, createuid, creategid, createsize,
                        createsetid, None)

            # blow it away immediately
            pool.destroy(1)

            # now recreate
            pool.create(createmode, createuid, creategid, createsize,
                        createsetid, None)

            # blow it away immediately
            pool.destroy(1)

            # now recreate
            pool.create(createmode, createuid, creategid, createsize,
                        createsetid, None)

            # blow it away immediately
            pool.destroy(1)

        except DaosApiError as excep:
            print(excep)
            print(traceback.format_exc())
            self.fail("create/destroy/create/destroy test failed.\n")

        except Exception as excep:
            self.fail("Daos code segfaulted most likely.  Error: %s" % excep)

        # no matter what happens cleanup
        finally:
            if self.agent_sessions:
                agent_utils.stop_agent(self.agent_sessions)
            server_utils.stop_server(hosts=self.hostlist_servers)
            os.remove(hostfile_servers)
示例#39
0
文件: metadata.py 项目: morsiee/daos
    def test_metadata_server_restart(self):
        """
        Test ID: DAOS-1512
        Test Description: This test will verify 2000 IOR small size container
                          after server restart. Test will write IOR in 5
                          different threads for faster execution time. Each
                          thread will create 400 (8bytes) containers to the
                          same pool. Restart the servers, read IOR container
                          file written previously and validate data integrity
                          by using IOR option "-R -G 1".
        :avocado: tags=metadata,metadata_ior,nvme,small
        """
        self.pool_connect = False
        files_per_thread = 400
        total_ior_threads = 5
        threads = []
        ior_args = {}

        createsvc = self.params.get("svcn", '/run/pool/createsvc/')
        svc_list = ""
        for i in range(createsvc):
            svc_list += str(int(self.pool.svc.rl_ranks[i])) + ":"
        svc_list = svc_list[:-1]

        ior_args['client_hostfile_servers'] = self.hostfile_clients
        ior_args['pool_uuid'] = self.pool.get_uuid_str()
        ior_args['svc_list'] = svc_list
        ior_args['basepath'] = self.basepath
        ior_args['server_group'] = self.server_group
        ior_args['tmp_dir'] = self.workdir
        ior_args['iorwriteflags'] = self.params.get("F",
                                                    '/run/ior/iorwriteflags/')
        ior_args['iorreadflags'] = self.params.get("F",
                                                   '/run/ior/iorreadflags/')
        ior_args['iteration'] = self.params.get("iter", '/run/ior/iteration/')
        ior_args['stripe_size'] = self.params.get("s", '/run/ior/stripesize/*')
        ior_args['stripe_count'] = self.params.get("c",
                                                   '/run/ior/stripecount/')
        ior_args['async_io'] = self.params.get("a", '/run/ior/asyncio/')
        ior_args['object_class'] = self.params.get("o",
                                                   '/run/ior/objectclass/')
        ior_args['slots'] = self.params.get("slots", '/run/ior/clientslots/*')

        ior_args['files_per_thread'] = files_per_thread
        self.out_queue = Queue.Queue()

        #IOR write threads
        for i in range(total_ior_threads):
            threads.append(
                threading.Thread(target=ior_runner_thread,
                                 args=(self.out_queue, "Thread-{}".format(i),
                                       "write"),
                                 kwargs=ior_args))
        if self.thread_control(threads, "write") == "FAIL":
            self.d_log.error(" IOR write Thread FAIL")
            self.fail(" IOR write Thread FAIL")

        #Server Restart
        if self.agent_sessions:
            agent_utils.stop_agent(self.agent_sessions, self.hostlist_clients)
        server_utils.stop_server(hosts=self.hostlist_servers)
        self.agent_sessions = agent_utils.run_agent(self.basepath,
                                                    self.hostlist_clients,
                                                    self.hostlist_servers)
        server_utils.run_server(self.hostfile_servers, self.server_group,
                                self.basepath)

        #Read IOR with verification with same number of threads
        threads = []
        for i in range(total_ior_threads):
            threads.append(
                threading.Thread(target=ior_runner_thread,
                                 args=(self.out_queue, "Thread-{}".format(i),
                                       "read"),
                                 kwargs=ior_args))
        if self.thread_control(threads, "read") == "FAIL":
            self.d_log.error(" IOR write Thread FAIL")
            self.fail(" IOR read Thread FAIL")
示例#40
0
    def test_destroy_recreate(self):
        """
        Test destroy and recreate one right after the other multiple times
        Should fail.

        :avocado: tags=pool,pooldestroy,destroyredo
        """

        try:
            # write out a hostfile and start the servers with it
            self.hostlist = self.params.get("test_machines1", '/run/hosts/')
            hostfile = write_host_file.write_host_file(self.hostlist, self.tmp)

            self.agent_sessions = AgentUtils.run_agent(self.basepath,
                                                       self.hostlist)
            server_utils.run_server(hostfile, self.server_group, self.basepath)

            # parameters used in pool create
            createmode = self.params.get("mode", '/run/poolparams/createmode/')
            createuid = self.params.get("uid", '/run/poolparams/createuid/')
            creategid = self.params.get("gid", '/run/poolparams/creategid/')
            createsetid = self.params.get("setname",
                                          '/run/poolparams/createset/')
            createsize = self.params.get("size", '/run/poolparams/createsize/')

            # initialize a python pool object then create the underlying
            # daos storage
            pool = DaosPool(self.context)
            pool.create(createmode, createuid, creategid,
                        createsize, createsetid, None)

            # blow it away immediately
            pool.destroy(1)

            # now recreate
            pool.create(createmode, createuid, creategid,
                        createsize, createsetid, None)

            # blow it away immediately
            pool.destroy(1)

            # now recreate
            pool.create(createmode, createuid, creategid,
                        createsize, createsetid, None)

            # blow it away immediately
            pool.destroy(1)

        except DaosApiError as excep:
            print(excep)
            print(traceback.format_exc())
            self.fail("create/destroy/create/destroy test failed.\n")

        except Exception as excep:
            self.fail("Daos code segfaulted most likely.  Error: %s" % excep)

        # no matter what happens cleanup
        finally:
            if self.agent_sessions:
                AgentUtils.stop_agent(self.hostlist, self.agent_sessions)
            server_utils.stop_server(hosts=self.hostlist)
            os.remove(hostfile)
示例#41
0
 def tearDown(self):
     try:
         if self.pool is not None and self.pool.attached:
             self.pool.destroy(1)
     finally:
         server_utils.stop_server(hosts=self.hostlist_servers)
示例#42
0
 def tearDown(self):
     if self.agent_sessions:
         agent_utils.stop_agent(self.agent_sessions, self.hostlist_clients)
     server_utils.stop_server(hosts=self.hostlist_servers)
示例#43
0
    def test_delete_wrong_servers(self):
        """
        Test destroying a pool valid pool but use the wrong server group.

        :avocado: tags=pool,pooldestroy
        """

        self.hostlist = self.params.get("test_machines1", '/run/hosts/')
        hostfile = write_host_file.write_host_file(self.hostlist, self.tmp)

        self.agent_sessions = AgentUtils.run_agent(self.basepath, self.hostlist)
        server_utils.run_server(hostfile, self.server_group, self.basepath)

        # need both a good and bad set
        goodsetid = self.params.get("setname",
                                    '/run/setnames/validsetname/')

        badsetid = self.params.get("setname",
                                   '/run/setnames/badsetname/')

        uuid_str = ""
        host = self.hostlist[0]
        # TODO make these params in the yaml
        daosctl = self.basepath + '/install/bin/daosctl'

        try:
            # use the uid/gid of the user running the test, these should
            # be perfectly valid
            uid = os.geteuid()
            gid = os.getegid()

            create_cmd = ('{0} create-pool -m {1} -u {2} -g {3} -s {4}'
                          .format(daosctl, 0x731, uid, gid, goodsetid))
            uuid_str = """{0}""".format(process.system_output(create_cmd))
            print ("uuid is {0}\n".format(uuid_str))

            exists = check_for_pool.check_for_pool(host, uuid_str)
            if exists != 0:
                self.fail("Pool {0} not found on host {1}.\n"
                          .format(uuid_str, host))

            delete_cmd = ('{0} destroy-pool -i {1} -s {2}'.format(daosctl,
                                                                  uuid_str,
                                                                  badsetid))

            process.system(delete_cmd)

            # the above command should fail resulting in an exception so if
            # we get here the test has failed
            self.fail("Pool {0} found on host {1} when not expected.\n"
                      .format(uuid_str, host))

        except Exception as _excep:
            # expecting an exception, but now need to
            # clean up the pool for real
            delete_cmd = ('{0} destroy-pool -i {1} -s {2}'
                          .format(daosctl, uuid_str, goodsetid))
            process.system(delete_cmd)

        # no matter what happens shutdown the server
        finally:
            if self.agent_sessions:
                AgentUtils.stop_agent(self.hostlist, self.agent_sessions)
            server_utils.stop_server(hosts=self.hostlist)
            os.remove(hostfile)
示例#44
0
    def test_simple_delete(self):
        """
        Test destroying a pool created on a single server, nobody is using
        the pool, force is not needed.

        :avocado: tags=pool,pooldestroy,quick
        """
        self.hostlist_servers = self.params.get("test_machines1",
                                                '/run/hosts/')
        hostfile_servers = write_host_file.write_host_file(
            self.hostlist_servers, self.tmp)

        self.agent_sessions = agent_utils.run_agent(self.basepath,
                                                    self.hostlist_servers)
        server_utils.run_server(hostfile_servers, self.server_group,
                                self.basepath)

        setid = self.params.get("setname", '/run/setnames/validsetname/')

        try:
            # use the uid/gid of the user running the test, these should
            # be perfectly valid
            uid = os.geteuid()
            gid = os.getegid()

            # TODO make these params in the yaml
            daosctl = self.basepath + '/install/bin/daosctl'

            create_cmd = ('{0} create-pool -m {1} -u {2} -g {3} -s {4}'.format(
                daosctl, 0x731, uid, gid, setid))

            uuid_str = """{0}""".format(process.system_output(create_cmd))
            print("uuid is {0}\n".format(uuid_str))

            host = self.hostlist_servers[0]
            exists = check_for_pool.check_for_pool(host, uuid_str)
            if exists != 0:
                self.fail("Pool {0} not found on host {1}.\n".format(
                    uuid_str, host))

            delete_cmd = ('{0} destroy-pool -i {1} -s {2}'.format(
                daosctl, uuid_str, setid))
            process.system(delete_cmd)

            exists = check_for_pool.check_for_pool(host, uuid_str)
            if exists == 0:
                self.fail(
                    "Pool {0} found on host {1} when not expected.\n".format(
                        uuid_str, host))

        except Exception as excep:
            print(excep)
            print(traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")

        # no matter what happens shutdown the server
        finally:
            try:
                os.remove(hostfile_servers)
            finally:
                if self.agent_sessions:
                    agent_utils.stop_agent(self.agent_sessions)
                server_utils.stop_server(hosts=self.hostlist_servers)
示例#45
0
 def tearDown(self):
     if self.agent_sessions:
         AgentUtils.stop_agent(self.hostlist, self.agent_sessions)
     server_utils.stop_server(hosts=self.hostlist)
示例#46
0
    def test_destroy_async(self):
        """
        Performn destroy asynchronously, successful and failed.

        :avocado: tags=pool,pooldestroy,destroyasync
        """

        global GLOB_SIGNAL
        global GLOB_RC

        try:
            # write out a hostfile and start the servers with it
            self.hostlist = self.params.get("test_machines1", '/run/hosts/')
            hostfile = write_host_file.write_host_file(self.hostlist, self.tmp)

            self.agent_sessions = AgentUtils.run_agent(self.basepath,
                                                       self.hostlist)
            server_utils.run_server(hostfile, self.server_group, self.basepath)

            # parameters used in pool create
            createmode = self.params.get("mode", '/run/poolparams/createmode/')
            createuid = self.params.get("uid", '/run/poolparams/createuid/')
            creategid = self.params.get("gid", '/run/poolparams/creategid/')
            createsetid = self.params.get("setname",
                                          '/run/poolparams/createset/')
            createsize = self.params.get("size", '/run/poolparams/createsize/')

            # initialize a python pool object then create the underlying
            # daos storage
            pool = DaosPool(self.context)
            pool.create(createmode, createuid, creategid,
                        createsize, createsetid, None)

            # allow the callback to tell us when its been called
            GLOB_SIGNAL = threading.Event()

            # blow it away but this time get return code via callback function
            pool.destroy(1, cb_func)

            # wait for callback
            GLOB_SIGNAL.wait()
            if GLOB_RC != 0:
                self.fail("RC not as expected in async test")

            # recreate the pool, reset the signal, shutdown the
            # servers so call will fail and then check rc in the callback
            pool.create(createmode, createuid, creategid,
                        createsize, createsetid, None)
            GLOB_SIGNAL = threading.Event()
            GLOB_RC = -9900000
            server_utils.stop_server(hosts=self.hostlist)
            pool.destroy(1, cb_func)

            # wait for callback, expecting a timeout since servers are down
            GLOB_SIGNAL.wait()
            if GLOB_RC != -1011:
                self.fail("RC not as expected in async test")

        except DaosApiError as excep:
            print(excep)
            print(traceback.format_exc())
            self.fail("destroy async test failed.\n")

        except Exception as excep:
            self.fail("Daos code segfaulted most likely. Error: %s" % excep)

        # no matter what happens cleanup
        finally:
            if self.agent_sessions:
                AgentUtils.stop_agent(self.hostlist, self.agent_sessions)
            server_utils.stop_server(hosts=self.hostlist)
            os.remove(hostfile)
示例#47
0
    def test_destroy_withdata(self):
        """
        Test destroy and recreate one right after the other multiple times
        Should fail.

        :avocado: tags=pool,pooldestroy,destroydata
        """
        try:
            # write out a hostfile and start the servers with it
            self.hostlist = self.params.get("test_machines1", '/run/hosts/')
            hostfile = write_host_file.write_host_file(self.hostlist, self.tmp)

            self.agent_sessions = AgentUtils.run_agent(self.basepath,
                                                       self.hostlist)
            server_utils.run_server(hostfile, self.server_group, self.basepath)

            # parameters used in pool create
            createmode = self.params.get("mode", '/run/poolparams/createmode/')
            createuid = self.params.get("uid", '/run/poolparams/createuid/')
            creategid = self.params.get("gid", '/run/poolparams/creategid/')
            createsetid = self.params.get("setname",
                                          '/run/poolparams/createset/')
            createsize = self.params.get("size", '/run/poolparams/createsize/')

            # initialize a python pool object then create the underlying
            # daos storage
            pool = DaosPool(self.context)
            pool.create(createmode, createuid, creategid,
                        createsize, createsetid, None)

            # need a connection to create container
            pool.connect(1 << 1)

            # create a container
            container = DaosContainer(self.context)
            container.create(pool.handle)

            pool.disconnect()

            daosctl = self.basepath + '/install/bin/daosctl'

            write_cmd = ('{0} write-pattern -i {1} -l 0 -c {2} -p sequential'.
                         format(daosctl, c_uuid_to_str(pool.uuid),
                                c_uuid_to_str(container.uuid)))

            process.system_output(write_cmd)

            # blow it away
            pool.destroy(1)

        except DaosApiError as excep:
            print(excep)
            print(traceback.format_exc())
            self.fail("create/destroy/create/destroy test failed.\n")

        except Exception as excep:
            self.fail("Daos code segfaulted most likely.  Error: %s" % excep)

        # no matter what happens cleanup
        finally:
            if self.agent_sessions:
                AgentUtils.stop_agent(self.hostlist, self.agent_sessions)
            server_utils.stop_server(hosts=self.hostlist)
            os.remove(hostfile)
示例#48
0
    def test_bad_server_group(self):
        """
        Test destroying a pool created on server group A by passing
        in server group B, should fail.

        :avocado: tags=pool,pooldestroy
        """
        setid2 = self.basepath + self.params.get("setname",
                                                 '/run/setnames/othersetname/')

        self.hostlist1 = self.params.get("test_machines1", '/run/hosts/')
        hostfile1 = write_host_file.write_host_file(self.hostlist1, self.tmp)

        self.hostlist2 = self.params.get("test_machines2a", '/run/hosts/')
        hostfile2 = write_host_file.write_host_file(self.hostlist2, self.tmp)


        # TODO make these params in the yaml
        daosctl = self.basepath + '/install/bin/daosctl'

        # start 2 different sets of servers,
        self.agent_sessions = AgentUtils.run_agent(self.basepath,
                                                   self.hostlist1)
        self.agent_sessions2 = AgentUtils.run_agent(self.basepath,
                                                    self.hostlist2)
        server_utils.run_server(hostfile1, self.server_group, self.basepath)
        server_utils.run_server(hostfile2, setid2, self.basepath)

        host = self.hostlist1[0]

        uuid_str = ""

        try:
            # use the uid/gid of the user running the test, these should
            # be perfectly valid
            uid = os.geteuid()
            gid = os.getegid()

            create_cmd = ('{0} create-pool -m {1} -u {2} -g {3} -s {4}'
                          .format(daosctl, 0x731, uid, gid,
                                  self.server_group))
            uuid_str = """{0}""".format(process.system_output(create_cmd))
            print ("uuid is {0}\n".format(uuid_str))

            exists = check_for_pool.check_for_pool(host, uuid_str)
            if exists != 0:
                self.fail("Pool {0} not found on host {1}.\n"
                          .format(uuid_str, host))

            # try and delete it using the wrong group
            delete_cmd = ('{0} destroy-pool -i {1} -s {2}'
                          .format(daosctl, uuid_str, setid2))

            process.system(delete_cmd)

            exists = check_for_pool.check_for_pool(host, uuid_str)
            if exists != 0:
                self.fail("Pool {0} not found on host {1} but delete "
                          "should have failed.\n".format(uuid_str, host))

        except Exception as _excep:
            # now issue a good delete command so we clean-up after this test
            delete_cmd = ('{0} destroy-pool -i {1} -s {2}'
                          .format(daosctl, uuid_str, self.server_group))

            process.system(delete_cmd)

            exists = check_for_pool.check_for_pool(host, uuid_str)
            if exists == 0:
                self.fail("Pool {0} ound on host {1} but delete"
                          "should have removed it.\n"
                          .format(uuid_str, host))

        # no matter what happens shutdown the server
        finally:
            if self.agent_sessions:
                AgentUtils.stop_agent(self.hostlist1, self.agent_sessions)
            if self.agent_sessions2:
                AgentUtils.stop_agent(self.hostlist2, self.agent_sessions2)
            server_utils.stop_server(hosts=self.hostlist)
            os.remove(hostfile1)
            os.remove(hostfile2)
示例#49
0
    def test_bad_server_group(self):
        """
        Test destroying a pool created on server group A by passing
        in server group B, should fail.

        :avocado: tags=pool,pooldestroy
        """
        setid2 = self.basepath + self.params.get(
            "setname", '/run/setnames/othersetname/')

        self.hostlist_servers1 = self.params.get("test_machines1",
                                                 '/run/hosts/')
        hostfile_servers1 = write_host_file.write_host_file(
            self.hostlist_servers1, self.tmp)

        self.hostlist_servers2 = self.params.get("test_machines2a",
                                                 '/run/hosts/')
        hostfile_servers2 = write_host_file.write_host_file(
            self.hostlist_servers2, self.tmp)

        # TODO make these params in the yaml
        daosctl = self.basepath + '/install/bin/daosctl'

        # start 2 different sets of servers,
        self.agent_sessions = agent_utils.run_agent(self.basepath,
                                                    self.hostlist_servers1)
        self.agent_sessions2 = agent_utils.run_agent(self.basepath,
                                                     self.hostlist_servers2)
        server_utils.run_server(hostfile_servers1, self.server_group,
                                self.basepath)
        server_utils.run_server(hostfile_servers2, setid2, self.basepath)

        host = self.hostlist_servers1[0]

        uuid_str = ""

        try:
            # use the uid/gid of the user running the test, these should
            # be perfectly valid
            uid = os.geteuid()
            gid = os.getegid()

            create_cmd = ('{0} create-pool -m {1} -u {2} -g {3} -s {4}'.format(
                daosctl, 0x731, uid, gid, self.server_group))
            uuid_str = """{0}""".format(process.system_output(create_cmd))
            print("uuid is {0}\n".format(uuid_str))

            exists = check_for_pool.check_for_pool(host, uuid_str)
            if exists != 0:
                self.fail("Pool {0} not found on host {1}.\n".format(
                    uuid_str, host))

            # try and delete it using the wrong group
            delete_cmd = ('{0} destroy-pool -i {1} -s {2}'.format(
                daosctl, uuid_str, setid2))

            process.system(delete_cmd)

            exists = check_for_pool.check_for_pool(host, uuid_str)
            if exists != 0:
                self.fail("Pool {0} not found on host {1} but delete "
                          "should have failed.\n".format(uuid_str, host))

        except Exception as _excep:
            # now issue a good delete command so we clean-up after this test
            delete_cmd = ('{0} destroy-pool -i {1} -s {2}'.format(
                daosctl, uuid_str, self.server_group))

            process.system(delete_cmd)

            exists = check_for_pool.check_for_pool(host, uuid_str)
            if exists == 0:
                self.fail("Pool {0} ound on host {1} but delete"
                          "should have removed it.\n".format(uuid_str, host))

        # no matter what happens shutdown the server
        finally:
            if self.agent_sessions:
                agent_utils.stop_agent(self.agent_sessions)
            if self.agent_sessions2:
                agent_utils.stop_agent(self.agent_sessions2)
            server_utils.stop_server(hosts=self.hostlist_servers)
            os.remove(hostfile_servers1)
            os.remove(hostfile_servers2)
示例#50
0
 def tearDown(self):
     try:
         if self.pool is not None and self.pool.attached:
             self.pool.destroy(1)
     finally:
         server_utils.stop_server(hosts=self.hostlist_servers)
示例#51
0
    def test_rebuild_with_io(self):
        """
        Test ID: Rebuild-003

        Test Description: Trigger a rebuild while I/O is ongoing.

        Use Cases:
          -- single pool, single client performing continous read/write/verify
             sequence while failure/rebuild is triggered in another process

        :avocado: tags=pool,rebuild,rebuildwithio
        """

        # the rebuild tests need to redo this stuff each time so not in setup
        # as it usually would be
        server_group = self.params.get("name", '/server_config/',
                                       'daos_server')

        self.hostlist_servers = self.params.get("test_machines", '/run/hosts/')
        hostfile_servers = write_host_file.write_host_file(
            self.hostlist_servers, self.workdir)

        try:
            self.agent_sessions = agent_utils.run_agent(self.basepath,
                                                        self.hostlist_servers)
            server_utils.run_server(hostfile_servers, server_group,
                                    self.basepath)

            # use the uid/gid of the user running the test, these should
            # be perfectly valid
            createuid = os.geteuid()
            creategid = os.getegid()

            # parameters used in pool create that are in yaml
            createmode = self.params.get("mode", '/run/testparams/createmode/')
            createsetid = self.params.get("setname",
                                          '/run/testparams/createset/')
            createsize = self.params.get("size", '/run/testparams/createsize/')

            # initialize a python pool object then create the underlying
            # daos storage
            pool = DaosPool(self.context)
            pool.create(createmode, createuid, creategid,
                        createsize, createsetid, None)
            pool.connect(1 << 1)
            container = DaosContainer(self.context)
            container.create(pool.handle)
            container.open()

            # get pool status and make sure it all looks good before we start
            pool.pool_query()
            if pool.pool_info.pi_ndisabled != 0:
                self.fail("Number of disabled targets reporting incorrectly.\n")
            if pool.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error but rebuild hasn't run.\n")
            if pool.pool_info.pi_rebuild_st.rs_done != 1:
                self.fail("Rebuild is running but device hasn't failed yet.\n")
            if pool.pool_info.pi_rebuild_st.rs_obj_nr != 0:
                self.fail("Rebuilt objs not zero.\n")
            if pool.pool_info.pi_rebuild_st.rs_rec_nr != 0:
                self.fail("Rebuilt recs not zero.\n")
            dummy_pool_version = pool.pool_info.pi_rebuild_st.rs_version

            # do I/O for 30 seconds
            dummy_bw = io_utilities.continuous_io(container, 30)

            # trigger the rebuild
            rank = self.params.get("rank", '/run/testparams/ranks/*')
            server = DaosServer(self.context, server_group, rank)
            server.kill(1)
            pool.exclude([rank])

            # do another 30 seconds of I/O,
            # waiting for some improvements in server bootstrap
            # at which point we can move the I/O to a separate client and
            # really pound it with I/O
            dummy_bw = io_utilities.continuous_io(container, 30)

            # wait for the rebuild to finish
            while True:
                pool.pool_query()
                if pool.pool_info.pi_rebuild_st.rs_done == 1:
                    break
                else:
                    time.sleep(2)

            # check rebuild statistics
            if pool.pool_info.pi_ndisabled != 1:
                self.fail("Number of disabled targets reporting incorrectly: {}"
                          .format(pool.pool_info.pi_ndisabled))
            if pool.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error reported: {}".format(
                    pool.pool_info.pi_rebuild_st.rs_errno))
            if pool.pool_info.pi_rebuild_st.rs_obj_nr <= 0:
                self.fail("No objects have been rebuilt.")
            if pool.pool_info.pi_rebuild_st.rs_rec_nr <= 0:
                self.fail("No records have been rebuilt.")

        except (ValueError, DaosApiError) as excep:
            print(excep)
            print(traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")

        finally:
            # wait for the I/O process to finish
            try:
                server_utils.stop_server(hosts=self.hostlist_servers)
                os.remove(hostfile_servers)
                # really make sure everything is gone
                check_for_pool.cleanup_pools(self.hostlist_servers)
            finally:
                if self.agent_sessions:
                    agent_utils.stop_agent(self.agent_sessions)
                server_utils.kill_server(self.hostlist_servers)
示例#52
0
    def test_multipool_rebuild(self):
        """
        Test ID: Rebuild-002
        Test Description: Expand on the basic test by rebuilding 2
        pools at once.

        Use Cases:
          -- multipool rebuild, single client, various object and record counds

        :avocado: tags=pool,rebuild,rebuildmulti
        """
        try:
            # initialize python pool object then create the underlying
            # daos storage, the way the code is now the pools should be
            # on the same storage and have the same service leader
            pool1 = DaosPool(self.context)
            pool2 = DaosPool(self.context)
            pool1.create(self.createmode, self.createuid, self.creategid,
                         self.createsize, self.createsetid)
            pool2.create(self.createmode, self.createuid, self.creategid,
                         self.createsize, self.createsetid)

            # want an open connection during rebuild
            pool1.connect(1 << 1)
            pool2.connect(1 << 1)

            # create containers
            container1 = DaosContainer(self.context)
            container1.create(pool1.handle)
            container2 = DaosContainer(self.context)
            container2.create(pool2.handle)

            # now open them
            container1.open()
            container2.open()

            # Putting the same data in both pools, at least for now to simplify
            # checking its correct
            saved_data = []
            for _objc in range(self.objcount):
                obj = None
                for _recc in range(self.reccount):

                    # make some stuff up and write
                    dkey = (''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(5)))
                    akey = (''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(5)))
                    data = (''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(self.size)))

                    # Used DAOS_OC_R1S_SPEC_RANK
                    # 1 replica with specified rank
                    obj, txn = container1.write_an_obj(data,
                                                       len(data),
                                                       dkey,
                                                       akey,
                                                       obj,
                                                       self.rank,
                                                       obj_cls=15)
                    obj, txn = container2.write_an_obj(data,
                                                       len(data),
                                                       dkey,
                                                       akey,
                                                       obj,
                                                       self.rank,
                                                       obj_cls=15)
                    saved_data.append((obj, dkey, akey, data, txn))

                    # read the data back and make sure its correct containers
                    data2 = container1.read_an_obj(self.size, dkey, akey, obj,
                                                   txn)
                    if data != data2.value:
                        self.fail(
                            "Wrote data P1, read it back, didn't match\n")
                    data2 = container2.read_an_obj(self.size, dkey, akey, obj,
                                                   txn)
                    if data != data2.value:
                        self.fail(
                            "Wrote data P2, read it back, didn't match\n")

            # kill a server
            server = DaosServer(self.context, self.server_group, self.rank)
            server.kill(1)

            # temporarily, the exclude of a failed target must be done
            # manually
            pool1.exclude([self.rank])
            pool2.exclude([self.rank])

            # check that rebuild finishes, no errors, progress data as
            # know it to be.  Check pool 1 first then we'll check 2 below.
            while True:
                pool1.pool_query()
                if pool1.pool_info.pi_rebuild_st.rs_done == 1:
                    break
                else:
                    time.sleep(2)

            # check there are no errors and other data matches what we
            # apriori know to be true,
            if pool1.pool_info.pi_ndisabled != 1:
                self.fail(
                    "P1 number disabled targets reporting incorrectly: {}".
                    format(pool1.pool_info.pi_ndisabled))
            if pool1.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("P1 rebuild error reported: {}".format(
                    pool1.pool_info.pi_rebuild_st.rs_errno))
            if pool1.pool_info.pi_rebuild_st.rs_obj_nr != self.objcount:
                self.fail("P1 rebuilt objs not as expected: {0} {1}".format(
                    pool1.pool_info.pi_rebuild_st.rs_obj_nr, self.objcount))
            if (pool1.pool_info.pi_rebuild_st.rs_rec_nr !=
                (self.reccount * self.objcount)):
                self.fail("P1 rebuilt recs not as expected: {0} {1}".format(
                    pool1.pool_info.pi_rebuild_st.rs_rec_nr,
                    self.reccount * self.objcount))

            # now that the rebuild finished verify the records are correct
            for tup in saved_data:
                data2 = container1.read_an_obj(len(tup[3]), tup[1], tup[2],
                                               tup[0], tup[4])
                if tup[3] != data2.value:
                    self.fail("after rebuild data didn't check out")

            # now check the other pool
            while True:
                pool2.pool_query()
                if pool2.pool_info.pi_rebuild_st.rs_done == 1:
                    break
                else:
                    time.sleep(2)

            # check there are no errors and other data matches what we
            # apriori know to be true
            if pool2.pool_info.pi_ndisabled != 1:
                self.fail(
                    "Number disabled targets reporting incorrectly: {}".format(
                        pool2.pool_info.pi_ndisabled))
            if pool2.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error reported: {}".format(
                    pool2.pool_info.pi_rebuild_st.rs_errno))
            if pool2.pool_info.pi_rebuild_st.rs_obj_nr != self.objcount:
                self.fail("Rebuilt objs not as expected: {0} {1}".format(
                    pool2.pool_info.pi_rebuild_st.rs_obj_nr, self.objcount))
            if (pool2.pool_info.pi_rebuild_st.rs_rec_nr !=
                (self.reccount * self.objcount)):
                self.fail("Rebuilt recs not as expected: {0} {1}".format(
                    pool2.pool_info.pi_rebuild_st.rs_rec_nr,
                    (self.reccount * self.objcount)))

            # now that the rebuild finished verify the records are correct
            for tup in saved_data:
                data2 = container2.read_an_obj(len(tup[3]), tup[1], tup[2],
                                               tup[0], tup[4])
                if tup[3] != data2.value:
                    self.fail("after rebuild data didn't check out")

        except DaosApiError as excp:
            print(excp)
            print(traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")

        finally:
            server_utils.stop_server(hosts=self.hostlist_servers)
            check_for_pool.cleanup_pools(self.hostlist_servers)
            server_utils.kill_server(self.hostlist_servers)
示例#53
0
    def test_delete_wrong_servers(self):
        """
        Test destroying a pool valid pool but use the wrong server group.

        :avocado: tags=pool,pooldestroy
        """

        self.hostlist_servers = self.params.get("test_machines1",
                                                '/run/hosts/')
        hostfile_servers = write_host_file.write_host_file(
            self.hostlist_servers, self.tmp)

        self.agent_sessions = agent_utils.run_agent(self.basepath,
                                                    self.hostlist_servers)
        server_utils.run_server(hostfile_servers, self.server_group,
                                self.basepath)

        # need both a good and bad set
        goodsetid = self.params.get("setname", '/run/setnames/validsetname/')

        badsetid = self.params.get("setname", '/run/setnames/badsetname/')

        uuid_str = ""
        host = self.hostlist_servers[0]
        # TODO make these params in the yaml
        daosctl = self.basepath + '/install/bin/daosctl'

        try:
            # use the uid/gid of the user running the test, these should
            # be perfectly valid
            uid = os.geteuid()
            gid = os.getegid()

            create_cmd = ('{0} create-pool -m {1} -u {2} -g {3} -s {4}'.format(
                daosctl, 0x731, uid, gid, goodsetid))
            uuid_str = """{0}""".format(process.system_output(create_cmd))
            print("uuid is {0}\n".format(uuid_str))

            exists = check_for_pool.check_for_pool(host, uuid_str)
            if exists != 0:
                self.fail("Pool {0} not found on host {1}.\n".format(
                    uuid_str, host))

            delete_cmd = ('{0} destroy-pool -i {1} -s {2}'.format(
                daosctl, uuid_str, badsetid))

            process.system(delete_cmd)

            # the above command should fail resulting in an exception so if
            # we get here the test has failed
            self.fail("Pool {0} found on host {1} when not expected.\n".format(
                uuid_str, host))

        except Exception as _excep:
            # expecting an exception, but now need to
            # clean up the pool for real
            delete_cmd = ('{0} destroy-pool -i {1} -s {2}'.format(
                daosctl, uuid_str, goodsetid))
            process.system(delete_cmd)

        # no matter what happens shutdown the server
        finally:
            if self.agent_sessions:
                agent_utils.stop_agent(self.agent_sessions)
            server_utils.stop_server(hosts=self.hostlist_servers)
            os.remove(hostfile_servers)
    def test_container_basics(self):
        """
        Test basic container create/destroy/open/close/query.  Nothing fancy
        just making sure they work at a rudimentary level

        :avocado: tags=container,containercreate,containerdestroy,basecont
        """

        pool = None
        hostlist = None

        try:
            hostlist = self.params.get("test_machines", '/run/hosts/*')
            hostfile = write_host_file.write_host_file(hostlist,
                                                       self.workdir)

            self.agent_sessions = AgentUtils.run_agent(self.basepath, hostlist)
            server_utils.run_server(hostfile, self.server_group, self.basepath)

            # give it time to start
            time.sleep(2)

            # parameters used in pool create
            createmode = self.params.get("mode", '/run/conttests/createmode/')
            createuid = self.params.get("uid", '/run/conttests/createuid/')
            creategid = self.params.get("gid", '/run/conttests/creategid/')
            createsetid = self.params.get("setname",
                                          '/run/conttests/createset/')
            createsize = self.params.get("size", '/run/conttests/createsize/')

            # initialize a python pool object then create the underlying
            # daos storage
            pool = DaosPool(self.context)
            pool.create(createmode, createuid, creategid,
                        createsize, createsetid, None)

            # need a connection to create container
            pool.connect(1 << 1)

            # create a container
            container = DaosContainer(self.context)
            container.create(pool.handle)

            # now open it
            container.open()

            # do a query and compare the UUID returned from create with
            # that returned by query
            container.query()

            if container.get_uuid_str() != c_uuid_to_str(
                    container.info.ci_uuid):
                self.fail("Container UUID did not match the one in info'n")

            container.close()

            # wait a few seconds and then destroy
            time.sleep(5)
            container.destroy()

        except DaosApiError as excep:
            print(excep)
            print(traceback.format_exc())
            self.fail("Test was expected to pass but it failed.\n")
        except Exception as excep:
            self.fail("Daos code segfaulted most likely, error: %s" % excep)
        finally:
            # cleanup the pool
            if pool is not None:
                pool.disconnect()
                pool.destroy(1)
            if self.agent_sessions:
                AgentUtils.stop_agent(hostlist, self.agent_sessions)
            server_utils.stop_server(hosts=hostlist)
示例#55
0
    def test_metadata_server_restart(self):
        """JIRA ID: DAOS-1512.

        Test Description:
            This test will verify 2000 IOR small size container after server
            restart. Test will write IOR in 5 different threads for faster
            execution time. Each thread will create 400 (8bytes) containers to
            the same pool. Restart the servers, read IOR container file written
            previously and validate data integrity by using IOR option
            "-R -G 1".

        Use Cases:
            ?

        :avocado: tags=metadata,metadata_ior,nvme,small
        """
        files_per_thread = 400
        total_ior_threads = 5
        self.out_queue = Queue.Queue()

        processes = self.params.get("slots", "/run/ior/clientslots/*")

        list_of_uuid_lists = [[
            str(uuid.uuid4()) for _ in range(files_per_thread)
        ] for _ in range(total_ior_threads)]

        # Launch threads to run IOR to write data, restart the agents and
        # servers, and then run IOR to read the data
        for operation in ("write", "read"):
            # Create the IOR threads
            threads = []
            for index in range(total_ior_threads):
                # Define the arguments for the ior_runner_thread method
                ior_cmd = IorCommand()
                ior_cmd.get_params(self)
                ior_cmd.set_daos_params(self.server_group, self.pool)
                ior_cmd.flags.value = self.params.get(
                    "F", "/run/ior/ior{}flags/".format(operation))

                # Add a thread for these IOR arguments
                threads.append(
                    threading.Thread(target=ior_runner_thread,
                                     kwargs={
                                         "ior_cmd": ior_cmd,
                                         "uuids": list_of_uuid_lists[index],
                                         "mgr": self.orterun,
                                         "attach": self.tmp,
                                         "hostfile": self.hostfile_clients,
                                         "procs": processes,
                                         "results": self.out_queue
                                     }))

                self.log.info("Creatied %s thread %s with container uuids %s",
                              operation, index, list_of_uuid_lists[index])

            # Launch the IOR threads
            if self.thread_control(threads, operation) == "FAIL":
                self.d_log.error("IOR {} Thread FAIL".format(operation))
                self.fail("IOR {} Thread FAIL".format(operation))

            # Restart the agents and servers after the write / before the read
            if operation == "write":
                # Stop the agents and servers
                if self.agent_sessions:
                    stop_agent(self.agent_sessions, self.hostlist_clients)
                stop_server(hosts=self.hostlist_servers)

                # Start the agents
                self.agent_sessions = run_agent(self.basepath,
                                                self.hostlist_clients,
                                                self.hostlist_servers)

                # Start the servers
                run_server(self.hostfile_servers,
                           self.server_group,
                           self.basepath,
                           clean=False)
示例#56
0
    def test_destroy_async(self):
        """
        Performn destroy asynchronously, successful and failed.

        :avocado: tags=pool,pooldestroy,destroyasync
        """

        global GLOB_SIGNAL
        global GLOB_RC

        try:
            # write out a hostfile_servers and start the servers with it
            self.hostlist_servers = self.params.get("test_machines1",
                                                    '/run/hosts/')
            hostfile_servers = write_host_file.write_host_file(
                self.hostlist_servers, self.tmp)

            self.agent_sessions = agent_utils.run_agent(
                self.basepath, self.hostlist_servers)
            server_utils.run_server(hostfile_servers, self.server_group,
                                    self.basepath)

            # parameters used in pool create
            createmode = self.params.get("mode", '/run/poolparams/createmode/')
            createuid = self.params.get("uid", '/run/poolparams/createuid/')
            creategid = self.params.get("gid", '/run/poolparams/creategid/')
            createsetid = self.params.get("setname",
                                          '/run/poolparams/createset/')
            createsize = self.params.get("size", '/run/poolparams/createsize/')

            # initialize a python pool object then create the underlying
            # daos storage
            pool = DaosPool(self.context)
            pool.create(createmode, createuid, creategid, createsize,
                        createsetid, None)

            # allow the callback to tell us when its been called
            GLOB_SIGNAL = threading.Event()

            # blow it away but this time get return code via callback function
            pool.destroy(1, cb_func)

            # wait for callback
            GLOB_SIGNAL.wait()
            if GLOB_RC != 0:
                self.fail("RC not as expected in async test")

            # recreate the pool, reset the signal, shutdown the
            # servers so call will fail and then check rc in the callback
            pool.create(createmode, createuid, creategid, createsize,
                        createsetid, None)
            GLOB_SIGNAL = threading.Event()
            GLOB_RC = -9900000
            server_utils.stop_server(hosts=self.hostlist_servers)
            pool.destroy(1, cb_func)

            # wait for callback, expecting a timeout since servers are down
            GLOB_SIGNAL.wait()
            if GLOB_RC != -1011:
                self.fail("RC not as expected in async test")

        except DaosApiError as excep:
            print(excep)
            print(traceback.format_exc())
            self.fail("destroy async test failed.\n")

        except Exception as excep:
            self.fail("Daos code segfaulted most likely. Error: %s" % excep)

        # no matter what happens cleanup
        finally:
            if self.agent_sessions:
                agent_utils.stop_agent(self.agent_sessions)
            server_utils.stop_server(hosts=self.hostlist_servers)
            os.remove(hostfile_servers)
示例#57
0
    def test_container_basics(self):
        """
        Test basic container create/destroy/open/close/query.  Nothing fancy
        just making sure they work at a rudimentary level

        :avocado: tags=container,containercreate,containerdestroy,basecont
        """

        pool = None
        hostlist = None

        try:
            hostlist = self.params.get("test_machines", '/run/hosts/*')
            hostfile = write_host_file.write_host_file(hostlist,
                                                       self.workdir)

            self.agent_sessions = agent_utils.run_agent(self.basepath, hostlist)
            server_utils.run_server(hostfile, self.server_group, self.basepath)

            # give it time to start
            time.sleep(2)

            # parameters used in pool create
            createmode = self.params.get("mode", '/run/conttests/createmode/')
            createuid = self.params.get("uid", '/run/conttests/createuid/')
            creategid = self.params.get("gid", '/run/conttests/creategid/')
            createsetid = self.params.get("setname",
                                          '/run/conttests/createset/')
            createsize = self.params.get("size", '/run/conttests/createsize/')

            # initialize a python pool object then create the underlying
            # daos storage
            pool = DaosPool(self.context)
            pool.create(createmode, createuid, creategid,
                        createsize, createsetid, None)

            # need a connection to create container
            pool.connect(1 << 1)

            # create a container
            container = DaosContainer(self.context)
            container.create(pool.handle)

            # now open it
            container.open()

            # do a query and compare the UUID returned from create with
            # that returned by query
            container.query()

            if container.get_uuid_str() != c_uuid_to_str(
                    container.info.ci_uuid):
                self.fail("Container UUID did not match the one in info'n")

            container.close()

            # wait a few seconds and then destroy
            time.sleep(5)
            container.destroy()

        except DaosApiError as excep:
            print(excep)
            print(traceback.format_exc())
            self.fail("Test was expected to pass but it failed.\n")
        except Exception as excep:
            self.fail("Daos code segfaulted most likely, error: %s" % excep)
        finally:
            # cleanup the pool
            if pool is not None:
                pool.disconnect()
                pool.destroy(1)
            if self.agent_sessions:
                agent_utils.stop_agent(self.agent_sessions)
            server_utils.stop_server(hosts=hostlist)
示例#58
0
    def test_metadata_server_restart(self):
        """
        Test ID: DAOS-1512
        Test Description: This test will verify 2000 IOR small size container
                          after server restart. Test will write IOR in 5
                          different threads for faster execution time. Each
                          thread will create 400 (8bytes) containers to the
                          same pool. Restart the servers, read IOR container
                          file written previously and validate data integrity
                          by using IOR option "-R -G 1".
        :avocado: tags=metadata,metadata_ior,nvme,small
        """
        self.pool_connect = False
        files_per_thread = 400
        total_ior_threads = 5
        threads = []
        ior_args = {}

        createsvc = self.params.get("svcn", '/run/pool/createsvc/')
        svc_list = ""
        for i in range(createsvc):
            svc_list += str(int(self.pool.svc.rl_ranks[i])) + ":"
        svc_list = svc_list[:-1]

        ior_args['client_hostfile'] = self.hostfile_clients
        ior_args['pool_uuid'] = self.pool.get_uuid_str()
        ior_args['svc_list'] = svc_list
        ior_args['basepath'] = self.basepath
        ior_args['server_group'] = self.server_group
        ior_args['tmp_dir'] = self.workdir
        ior_args['iorwriteflags'] = self.params.get("F",
                                                    '/run/ior/iorwriteflags/')
        ior_args['iorreadflags'] = self.params.get("F",
                                                   '/run/ior/iorreadflags/')
        ior_args['iteration'] = self.params.get("iter", '/run/ior/iteration/')
        ior_args['stripe_size'] = self.params.get("s", '/run/ior/stripesize/*')
        ior_args['stripe_count'] = self.params.get("c", '/run/ior/stripecount/')
        ior_args['async_io'] = self.params.get("a", '/run/ior/asyncio/')
        ior_args['object_class'] = self.params.get("o", '/run/ior/objectclass/')
        ior_args['slots'] = self.params.get("slots", '/run/ior/clientslots/*')

        ior_args['files_per_thread'] = files_per_thread
        self.out_queue = Queue.Queue()

        #IOR write threads
        for i in range(total_ior_threads):
            threads.append(threading.Thread(target=ior_runner_thread,
                                            args=(self.out_queue,
                                                  "Thread-{}".format(i),
                                                  "write"),
                                            kwargs=ior_args))
        if self.thread_control(threads, "write") == "FAIL":
            self.d_log.error(" IOR write Thread FAIL")
            self.fail(" IOR write Thread FAIL")

        #Server Restart
        if self.agent_sessions:
            AgentUtils.stop_agent(self.hostlist_clients, self.agent_sessions)
        server_utils.stop_server(hosts=self.hostlist)
        self.agent_sessions = AgentUtils.run_agent(self.basepath,
                                                   self.hostlist_clients,
                                                   self.hostlist)
        server_utils.run_server(self.hostfile, self.server_group, self.basepath)

        #Read IOR with verification with same number of threads
        threads = []
        for i in range(total_ior_threads):
            threads.append(threading.Thread(target=ior_runner_thread,
                                            args=(self.out_queue,
                                                  "Thread-{}".format(i),
                                                  "read"),
                                            kwargs=ior_args))
        if self.thread_control(threads, "read") == "FAIL":
            self.d_log.error(" IOR write Thread FAIL")
            self.fail(" IOR read Thread FAIL")
示例#59
0
    def test_destroy_async(self):
        """Destroy pool asynchronously.

        Create two server groups. Perform destroy asynchronously
        Expect the destroy to work on the server group where the pool was
        created and expect the destroy pool to fail on the second server.
        :avocado: tags=pool,pooldestroy,destroyasync
        """
        # Start two server groups
        group_names = [self.server_group + "_a", self.server_group + "_b"]
        group_hosts = {
            group_names[0]: self.hostlist_servers[:1],
            group_names[1]: self.hostlist_servers[1:2]
        }
        self.start_servers(group_hosts)

        self.pool = TestPool(self.context, self.log)
        self.pool.get_params(self)
        self.pool.name.value = group_names[0]
        self.pool.create()
        self.log.info("Pool UUID is %s on server_group %s",
                      self.pool.uuid, group_names[0])

        # Check that the pool was created on server_group_a
        self.assertTrue(
            self.pool.check_files(group_hosts[group_names[0]]),
            "Pool data not detected on servers before destroy")

        # Check that the pool was not created on server_group_b
        self.assertFalse(
            self.pool.check_files(group_hosts[group_names[1]]),
            "Pool data detected on servers before destroy")

        # Create callback handler
        cb_handler = CallbackHandler()

        # Destroy pool on server_group_a with callback

        self.log.info("Attempting to destroy pool")
        self.pool.pool.destroy(0, cb_handler.callback)
        cb_handler.wait()
        if cb_handler.ret_code != 0:
            self.fail("destroy-pool was expected to PASS")

        self.assertFalse(
            self.pool.check_files(group_hosts[group_names[0]]),
            "Pool data detected on {} after destroy".format(group_names[0]))

        # Destroy pool with callback while stopping other server
        # Create new pool on server_group_a
        self.pool = TestPool(self.context, self.log)
        self.pool.get_params(self)
        self.pool.name.value = group_names[0]
        self.pool.create()
        self.log.info("Pool UUID is %s on server_group %s",
                      self.pool.uuid, group_names[0])

        # Check that the pool was created on server_group_a
        self.assertTrue(
            self.pool.check_files(group_hosts[group_names[0]]),
            "Pool data not detected on servers before destroy")

        # Check that the pool was not created on server_group_b
        self.assertFalse(
            self.pool.check_files(group_hosts[group_names[1]]),
            "Pool data detected on servers before destroy")

        self.log.info("Stopping one server")
        server_utils.stop_server(hosts=group_hosts[group_names[1]])

        self.log.info("Attempting to destroy pool")
        self.pool.pool.destroy(0, cb_handler.callback)
        cb_handler.wait()
        if cb_handler.ret_code != 0:
            self.fail("destroy-pool was expected to PASS")

        self.assertFalse(
            self.pool.check_files(group_hosts[group_names[1]]),
            "Pool data detected on servers after destroy")
示例#60
0
    def test_destroy_withdata(self):
        """
        Test destroy and recreate one right after the other multiple times
        Should fail.

        :avocado: tags=pool,pooldestroy,destroydata
        """
        try:
            # write out a hostfile_servers and start the servers with it
            self.hostlist_servers = self.params.get("test_machines1",
                                                    '/run/hosts/')
            hostfile_servers = write_host_file.write_host_file(
                self.hostlist_servers, self.tmp)

            self.agent_sessions = agent_utils.run_agent(
                self.basepath, self.hostlist_servers)
            server_utils.run_server(hostfile_servers, self.server_group,
                                    self.basepath)

            # parameters used in pool create
            createmode = self.params.get("mode", '/run/poolparams/createmode/')
            createuid = self.params.get("uid", '/run/poolparams/createuid/')
            creategid = self.params.get("gid", '/run/poolparams/creategid/')
            createsetid = self.params.get("setname",
                                          '/run/poolparams/createset/')
            createsize = self.params.get("size", '/run/poolparams/createsize/')

            # initialize a python pool object then create the underlying
            # daos storage
            pool = DaosPool(self.context)
            pool.create(createmode, createuid, creategid, createsize,
                        createsetid, None)

            # need a connection to create container
            pool.connect(1 << 1)

            # create a container
            container = DaosContainer(self.context)
            container.create(pool.handle)

            pool.disconnect()

            daosctl = self.basepath + '/install/bin/daosctl'

            write_cmd = (
                '{0} write-pattern -i {1} -l 0 -c {2} -p sequential'.format(
                    daosctl, c_uuid_to_str(pool.uuid),
                    c_uuid_to_str(container.uuid)))

            process.system_output(write_cmd)

            # blow it away
            pool.destroy(1)

        except DaosApiError as excep:
            print(excep)
            print(traceback.format_exc())
            self.fail("create/destroy/create/destroy test failed.\n")

        except Exception as excep:
            self.fail("Daos code segfaulted most likely.  Error: %s" % excep)

        # no matter what happens cleanup
        finally:
            if self.agent_sessions:
                agent_utils.stop_agent(self.agent_sessions)
            server_utils.stop_server(hosts=self.hostlist_servers)
            os.remove(hostfile_servers)