Пример #1
0
class PoolSvc(TestWithServers):
    """
    Tests svc argument while pool create.
    :avocado: recursive
    """
    def setUp(self):
        super(PoolSvc, self).setUp()
        self.pool = None

        self.hostfile_servers = None
        self.hostlist_servers = self.params.get("test_machines",
                                                '/run/hosts/*')
        self.hostfile_servers = write_host_file.write_host_file(
            self.hostlist_servers, self.workdir)
        print("Host file is: {}".format(self.hostfile_servers))

        self.agent_sessions = agent_utils.run_agent(self.basepath,
                                                    self.hostlist_servers)
        server_utils.run_server(self.hostfile_servers, self.server_group,
                                self.basepath)

    def tearDown(self):
        try:
            if self.pool is not None and self.pool.attached:
                self.pool.destroy(1)
        finally:
            super(PoolSvc, self).tearDown()

    def test_poolsvc(self):
        """
        Test svc arg during pool create.

        :avocado: tags=pool,svc
        """

        # parameters used in pool create
        createmode = self.params.get("mode", '/run/createtests/createmode/*/')
        createuid = os.geteuid()
        creategid = os.getegid()
        createsetid = self.params.get("setname", '/run/createtests/createset/')
        createsize = self.params.get("size", '/run/createtests/createsize/')
        createsvc = self.params.get("svc", '/run/createtests/createsvc/*/')

        expected_result = createsvc[1]

        try:
            # initialize a python pool object then create the underlying
            # daos storage
            self.pool = DaosPool(self.context)
            self.pool.create(createmode, createuid, creategid, createsize,
                             createsetid, None, None, createsvc[0])
            self.pool.connect(1 << 1)

            # checking returned rank list for server more than 1
            iterator = 0
            while (int(self.pool.svc.rl_ranks[iterator]) > 0
                   and int(self.pool.svc.rl_ranks[iterator]) <= createsvc[0]
                   and int(self.pool.svc.rl_ranks[iterator]) != 999999):
                iterator += 1
                if iterator != createsvc[0]:
                    self.fail("Length of Returned Rank list is not equal to "
                              "the number of Pool Service members.\n")
            rank_list = []
            for iterator in range(createsvc[0]):
                rank_list.append(int(self.pool.svc.rl_ranks[iterator]))
                if len(rank_list) != len(set(rank_list)):
                    self.fail("Duplicate values in returned rank list")

            if createsvc[0] == 3:
                self.pool.disconnect()
                cmd = ('{0} kill-leader  --uuid={1}'.format(
                    self.daosctl, self.pool.get_uuid_str()))
                process.system(cmd)
                self.pool.connect(1 << 1)
                self.pool.disconnect()
                server = DaosServer(self.context, self.server_group, 2)
                server.kill(1)
                self.pool.exclude([2])
                self.pool.connect(1 << 1)

            if expected_result in ['FAIL']:
                self.fail("Test was expected to fail but it passed.\n")

        except DaosApiError as excep:
            print(excep)
            print(traceback.format_exc())
            if expected_result == 'PASS':
                self.fail("Test was expected to pass but it failed.\n")
Пример #2
0
    def test_rebuild_with_io(self):
        """
        Test ID: Rebuild-003

        Test Description: Trigger a rebuild while I/O is ongoing.

        Use Cases:
          -- single pool, single client performing continous read/write/verify
             sequence while failure/rebuild is triggered in another process

        :avocado: tags=pool,rebuild,rebuildwithio
        """

        # the rebuild tests need to redo this stuff each time so not in setup
        # as it usually would be
        server_group = self.params.get("name", '/server_config/',
                                       'daos_server')

        basepath = os.path.normpath(self.build_paths['PREFIX'] + "/../")

        self.hostlist = self.params.get("test_machines", '/run/hosts/')
        hostfile = write_host_file.write_host_file(self.hostlist, self.workdir)

        try:
            self.agent_sessions = AgentUtils.run_agent(basepath, self.hostlist)
            server_utils.run_server(hostfile, server_group, basepath)

            # use the uid/gid of the user running the test, these should
            # be perfectly valid
            createuid = os.geteuid()
            creategid = os.getegid()

            # parameters used in pool create that are in yaml
            createmode = self.params.get("mode", '/run/testparams/createmode/')
            createsetid = self.params.get("setname",
                                          '/run/testparams/createset/')
            createsize = self.params.get("size", '/run/testparams/createsize/')

            # initialize a python pool object then create the underlying
            # daos storage
            pool = DaosPool(self.context)
            pool.create(createmode, createuid, creategid,
                        createsize, createsetid, None)
            pool.connect(1 << 1)
            container = DaosContainer(self.context)
            container.create(pool.handle)
            container.open()

            # get pool status and make sure it all looks good before we start
            pool.pool_query()
            if pool.pool_info.pi_ndisabled != 0:
                self.fail("Number of disabled targets reporting incorrectly.\n")
            if pool.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error but rebuild hasn't run.\n")
            if pool.pool_info.pi_rebuild_st.rs_done != 1:
                self.fail("Rebuild is running but device hasn't failed yet.\n")
            if pool.pool_info.pi_rebuild_st.rs_obj_nr != 0:
                self.fail("Rebuilt objs not zero.\n")
            if pool.pool_info.pi_rebuild_st.rs_rec_nr != 0:
                self.fail("Rebuilt recs not zero.\n")
            dummy_pool_version = pool.pool_info.pi_rebuild_st.rs_version

            # do I/O for 30 seconds
            dummy_bw = io_utilities.continuous_io(container, 30)

            # trigger the rebuild
            rank = self.params.get("rank", '/run/testparams/ranks/*')
            server = DaosServer(self.context, server_group, rank)
            server.kill(1)
            pool.exclude([rank])

            # do another 30 seconds of I/O,
            # waiting for some improvements in server bootstrap
            # at which point we can move the I/O to a separate client and
            # really pound it with I/O
            dummy_bw = io_utilities.continuous_io(container, 30)

            # wait for the rebuild to finish
            while True:
                pool.pool_query()
                if pool.pool_info.pi_rebuild_st.rs_done == 1:
                    break
                else:
                    time.sleep(2)

            # check rebuild statistics
            if pool.pool_info.pi_ndisabled != 1:
                self.fail("Number of disabled targets reporting incorrectly: {}"
                          .format(pool.pool_info.pi_ndisabled))
            if pool.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error reported: {}".format(
                    pool.pool_info.pi_rebuild_st.rs_errno))
            if pool.pool_info.pi_rebuild_st.rs_obj_nr <= 0:
                self.fail("No objects have been rebuilt.")
            if pool.pool_info.pi_rebuild_st.rs_rec_nr <= 0:
                self.fail("No records have been rebuilt.")

        except (ValueError, DaosApiError) as excep:
            print(excep)
            print(traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")

        finally:
            # wait for the I/O process to finish
            try:
                server_utils.stop_server(hosts=self.hostlist)
                os.remove(hostfile)
                # really make sure everything is gone
                check_for_pool.cleanup_pools(self.hostlist)
            finally:
                if self.agent_sessions:
                    AgentUtils.stop_agent(self.hostlist, self.agent_sessions)
                server_utils.kill_server(self.hostlist)
Пример #3
0
class DestroyRebuild(Test):

    """
    Test Class Description:
    This test verifies destruction of a pool that is rebuilding.

    :avocado: tags=pool,pooldestroy,rebuild,desreb
    """

    build_paths = []
    server_group = ""
    context = None
    pool = None
    hostfile = ""

    def setUp(self):
        """ setup for the test """
        self.agent_sessions = None
        # get paths from the build_vars generated by build
        with open('../../../.build_vars.json') as build_file:
            build_paths = json.load(build_file)
        self.context = DaosContext(build_paths['PREFIX'] + '/lib/')
        self.basepath = os.path.normpath(build_paths['PREFIX'] + "/../")

        # generate a hostfile
        self.hostlist = self.params.get("test_machines", '/run/hosts/')
        self.hostfile = write_host_file.write_host_file(self.hostlist,
                                                        self.workdir)

        # fire up the DAOS servers
        self.server_group = self.params.get("name", '/run/server_config/',
                                            'daos_server')
        self.agent_sessions = AgentUtils.run_agent(self.basepath, self.hostlist)
        server_utils.run_server(self.hostfile, self.server_group,
                                build_paths['PREFIX'] + '/../')

        # create a pool to test with
        createmode = self.params.get("mode", '/run/pool/createmode/')
        createuid = self.params.get("uid", '/run/pool/createuid/')
        creategid = self.params.get("gid", '/run/pool/creategid/')
        createsetid = self.params.get("setname", '/run/pool/createset/')
        createsize = self.params.get("size", '/run/pool/createsize/')
        self.pool = DaosPool(self.context)
        self.pool.create(createmode, createuid, creategid, createsize,
                         createsetid)
        self.pool.get_uuid_str()

        time.sleep(2)

    def tearDown(self):
        """ cleanup after the test """

        try:
            os.remove(self.hostfile)
            if self.pool:
                self.pool.destroy(1)
        finally:
            if self.agent_sessions:
                AgentUtils.stop_agent(self.hostlist, self.agent_sessions)
            server_utils.stop_server(hosts=self.hostlist)


    def test_destroy_while_rebuilding(self):
        """
        :avocado: tags=pool,pooldestroy,rebuild,desreb
        """
        try:
            print("\nsetup complete, starting test\n")

            # create a server object that references on of our pool target hosts
            # and then kill it
            svr_to_kill = int(self.params.get("rank_to_kill",
                                              '/run/testparams/ranks/'))
            server = DaosServer(self.context, bytes(self.server_group),
                                svr_to_kill)

            print("created server ")

            # BUG if you don't connect the rebuild doesn't start correctly
            self.pool.connect(1 << 1)
            status = self.pool.pool_query()
            if not status.pi_ntargets == len(self.hostlist):
                self.fail("target count wrong.\n")
            if not status.pi_ndisabled == 0:
                self.fail("disabled target count wrong.\n")

            print("connect ")

            time.sleep(1)
            server.kill(1)

            print("killed server ")

            # exclude the target from the dead server
            self.pool.exclude([svr_to_kill])

            print("exclude target ")

            #self.pool.disconnect()
            #print "disconnect "

            # the rebuild won't take long since there is no data so do
            # the destroy quickly
            self.pool.destroy(1)
            print("destroy ")

        except DaosApiError as excep:
            print(excep)
            print(traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")
Пример #4
0
class DestroyRebuild(Test):

    """
    Test Class Description:
    This test verifies destruction of a pool that is rebuilding.

    :avocado: recursive
    """

    build_paths = []
    server_group = ""
    context = None
    pool = None
    hostfile_servers = ""

    def setUp(self):
        """ setup for the test """
        self.agent_sessions = None
        # get paths from the build_vars generated by build
        with open('../../../.build_vars.json') as build_file:
            build_paths = json.load(build_file)
        self.context = DaosContext(build_paths['PREFIX'] + '/lib/')
        self.basepath = os.path.normpath(build_paths['PREFIX'] + "/../")

        # generate a hostfile
        self.hostlist_servers = self.params.get("test_machines", '/run/hosts/')
        self.hostfile_servers = write_host_file.write_host_file(
            self.hostlist_servers, self.workdir)

        # fire up the DAOS servers
        self.server_group = self.params.get("name", '/run/server_config/',
                                            'daos_server')
        self.agent_sessions = agent_utils.run_agent(self.basepath,
                                                    self.hostlist_servers)
        server_utils.run_server(self.hostfile_servers, self.server_group,
                                build_paths['PREFIX'] + '/../')

        # create a pool to test with
        createmode = self.params.get("mode", '/run/pool/createmode/')
        createuid = self.params.get("uid", '/run/pool/createuid/')
        creategid = self.params.get("gid", '/run/pool/creategid/')
        createsetid = self.params.get("setname", '/run/pool/createset/')
        createsize = self.params.get("size", '/run/pool/createsize/')
        self.pool = DaosPool(self.context)
        self.pool.create(createmode, createuid, creategid, createsize,
                         createsetid)
        self.pool.get_uuid_str()

        time.sleep(2)

    def tearDown(self):
        """ cleanup after the test """

        try:
            os.remove(self.hostfile_servers)
            if self.pool:
                self.pool.destroy(1)
        finally:
            if self.agent_sessions:
                agent_utils.stop_agent(self.agent_sessions)
            server_utils.stop_server(hosts=self.hostlist_servers)


    def test_destroy_while_rebuilding(self):
        """
        :avocado: tags=pool,pooldestroy,rebuild,desreb
        """
        try:
            print("\nsetup complete, starting test\n")

            # create a server object that references on of our pool target hosts
            # and then kill it
            svr_to_kill = int(self.params.get("rank_to_kill",
                                              '/run/testparams/ranks/'))
            server = DaosServer(self.context, bytes(self.server_group),
                                svr_to_kill)

            print("created server ")

            # BUG if you don't connect the rebuild doesn't start correctly
            self.pool.connect(1 << 1)
            status = self.pool.pool_query()
            if not status.pi_ntargets == len(self.hostlist_servers):
                self.fail("target count wrong.\n")
            if not status.pi_ndisabled == 0:
                self.fail("disabled target count wrong.\n")

            print("connect ")

            time.sleep(1)
            server.kill(1)

            print("killed server ")

            # exclude the target from the dead server
            self.pool.exclude([svr_to_kill])

            print("exclude target ")

            #self.pool.disconnect()
            #print "disconnect "

            # the rebuild won't take long since there is no data so do
            # the destroy quickly
            self.pool.destroy(1)
            print("destroy ")

        except DaosApiError as excep:
            print(excep)
            print(traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")
Пример #5
0
class RebuildNoCap(Test):
    """
    Test Class Description:
    This class contains tests for pool rebuild.

    :avocado: tags=pool,rebuild,nocap
    """

    build_paths = []
    server_group = ""
    CONTEXT = None
    POOL = None
    hostfile = ""

    def setUp(self):
        """ setup for the test """

        # get paths from the build_vars generated by build
        with open('../../../.build_vars.json') as f:
            build_paths = json.load(f)
        self.CONTEXT = DaosContext(build_paths['PREFIX'] + '/lib/')

        # generate a hostfile
        self.host_list = self.params.get("test_machines", '/run/hosts/')
        tmp = build_paths['PREFIX'] + '/tmp'
        self.hostfile = WriteHostFile.WriteHostFile(self.host_list, tmp)

        # fire up the DAOS servers
        self.server_group = self.params.get("server_group", '/run/server/',
                                            'daos_server')
        ServerUtils.runServer(self.hostfile, self.server_group,
                              build_paths['PREFIX'] + '/../')
        time.sleep(3)

        # create a pool to test with
        createmode = self.params.get("mode", '/run/pool/createmode/')
        createuid = self.params.get("uid", '/run/pool/createuid/')
        creategid = self.params.get("gid", '/run/pool/creategid/')
        createsetid = self.params.get("setname", '/run/pool/createset/')
        createsize = self.params.get("size", '/run/pool/createsize/')
        self.POOL = DaosPool(self.CONTEXT)
        self.POOL.create(createmode, createuid, creategid, createsize,
                         createsetid)
        uuid = self.POOL.get_uuid_str()

        time.sleep(2)

        # stuff some bogus data into the pool
        how_many_bytes = long(
            self.params.get("datasize", '/run/testparams/datatowrite/'))
        exepath = build_paths['PREFIX'] +\
                  "/../src/tests/ftest/util/WriteSomeData.py"
        cmd = "export DAOS_POOL={0}; export DAOS_SVCL=1; mpirun"\
              " --np 1 --host {1} {2} {3} testfile".format(
                  uuid, self.host_list[0], exepath, how_many_bytes)
        subprocess.call(cmd, shell=True)

    def tearDown(self):
        """ cleanup after the test """

        os.remove(self.hostfile)
        self.POOL.destroy(1)
        ServerUtils.stopServer()

    def test_rebuild_no_capacity(self):
        """
        :avocado: tags=pool,rebuild,nocap
        """
        try:
            print "\nsetup complete, starting test\n"

            # create a server object that references on of our pool target hosts
            # and then kill it
            svr_to_kill = int(
                self.params.get("rank_to_kill", '/run/testparams/ranks/'))
            sh = DaosServer(self.CONTEXT, bytes(self.server_group),
                            svr_to_kill)

            time.sleep(1)
            sh.kill(1)

            # exclude the target from the dead server
            self.POOL.exclude([svr_to_kill])

            # exclude should trigger rebuild, check
            self.POOL.connect(1 << 1)
            status = self.POOL.pool_query()
            if not status.pi_ntargets == len(self.host_list):
                self.fail("target count wrong.\n")
            if not status.pi_ndisabled == 1:
                self.fail("disabled target count wrong.\n")

            # the pool should be too full to start a rebuild so
            # expecting an error
            # not sure yet specifically what error
            if status.pi_rebuild_st[2] == 0:
                self.fail("expecting rebuild to fail but it didn't.\n")

        except ValueError as e:
            print(e)
            print(traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")
Пример #6
0
    def test_simple_rebuild(self):
        """
        Test ID: Rebuild-001

        Test Description: The most basic rebuild test.

        Use Cases:
          -- single pool rebuild, single client, various reord/object
             counts

        :avocado: tags=pool,rebuild,rebuildsimple
        """

        # the rebuild tests need to redo this stuff each time so not in setup
        # as it usually would be
        setid = self.params.get("setname", '/run/testparams/setnames/')
        server_group = self.params.get("server_group", '/server/',
                                       'daos_server')

        basepath = os.path.normpath(self.build_paths['PREFIX'] + "/../")
        tmp = self.build_paths['PREFIX'] + '/tmp'

        self.hostlist = self.params.get("test_machines", '/run/hosts/')
        hostfile = WriteHostFile.WriteHostFile(self.hostlist, tmp)

        try:
            ServerUtils.runServer(hostfile, server_group, basepath)

            # use the uid/gid of the user running the test, these should
            # be perfectly valid
            createuid = os.geteuid()
            creategid = os.getegid()

            # parameters used in pool create that are in yaml
            createmode = self.params.get("mode", '/run/testparams/createmode/')
            createsetid = self.params.get("setname",
                                          '/run/testparams/createset/')
            createsize = self.params.get("size", '/run/testparams/createsize/')

            # initialize a python pool object then create the underlying
            # daos storage
            pool = DaosPool(self.Context)
            pool.create(createmode, createuid, creategid, createsize,
                        createsetid, None)

            # want an open connection during rebuild
            pool.connect(1 << 1)

            # get pool status we want to test later
            pool.pool_query()
            if pool.pool_info.pi_ndisabled != 0:
                self.fail(
                    "Number of disabled targets reporting incorrectly.\n")
            if pool.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error but rebuild hasn't run.\n")
            if pool.pool_info.pi_rebuild_st.rs_done != 1:
                self.fail("Rebuild is running but device hasn't failed yet.\n")
            if pool.pool_info.pi_rebuild_st.rs_obj_nr != 0:
                self.fail("Rebuilt objs not zero.\n")
            if pool.pool_info.pi_rebuild_st.rs_rec_nr != 0:
                self.fail("Rebuilt recs not zero.\n")
            pool_version = pool.pool_info.pi_rebuild_st.rs_version

            # create a container
            container = DaosContainer(self.Context)
            container.create(pool.handle)

            # now open it
            container.open()

            # how many objects and records are we creating
            objcount = self.params.get("objcount",
                                       '/run/testparams/numobjects/*')
            reccount = self.params.get("reccount",
                                       '/run/testparams/numrecords/*')
            if objcount == 0:
                reccount = 0

            # which rank to write to and kill
            rank = self.params.get("rank", '/run/testparams/ranks/*')

            # how much data to write with each key
            size = self.params.get("size", '/run/testparams/datasize/')

            saved_data = []
            for i in range(0, objcount):
                obj = None
                for j in range(0, reccount):

                    # make some stuff up and write
                    dkey = ''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(5))
                    akey = ''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(5))
                    data = ''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(size))

                    obj, tx = container.write_an_obj(data, len(data), dkey,
                                                     akey, obj, rank)

                    saved_data.append((obj, dkey, akey, data, tx))

                    # read the data back and make sure its correct
                    data2 = container.read_an_obj(size, dkey, akey, obj, tx)
                    if data != data2.value:
                        self.fail("Write data 1, read it back, didn't match\n")

            # kill a server that has
            server = DaosServer(self.Context, server_group, rank)
            server.kill(1)

            # temporarily, the exclude of a failed target must be done
            # manually
            pool.exclude([rank])

            while True:
                # get the pool/rebuild status again
                pool.pool_query()
                if pool.pool_info.pi_rebuild_st.rs_done == 1:
                    break
                else:
                    time.sleep(2)

            if pool.pool_info.pi_ndisabled != 1:
                self.fail(
                    "Number of disabled targets reporting incorrectly: {}".
                    format(pool.pool_info.pi_ndisabled))
            if pool.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error reported: {}".format(
                    pool.pool_info.pi_rebuild_st.rs_errno))
            if pool.pool_info.pi_rebuild_st.rs_obj_nr != objcount:
                self.fail("Rebuilt objs not as expected: {0} {1}".format(
                    pool.pool_info.pi_rebuild_st.rs_obj_nr, objcount))
            if pool.pool_info.pi_rebuild_st.rs_rec_nr != (reccount * objcount):
                self.fail("Rebuilt recs not as expected: {0} {1}".format(
                    pool.pool_info.pi_rebuild_st.rs_rec_nr,
                    reccount * objcount))

            # now that the rebuild finished verify the records are correct
            for tup in saved_data:
                data2 = container.read_an_obj(len(tup[3]), tup[1], tup[2],
                                              tup[0], tup[4])
                if tup[3] != data2.value:
                    self.fail("after rebuild data didn't check out")

        except DaosApiError as e:
            print(e)
            print(traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")

        finally:
            try:
                ServerUtils.stopServer(hosts=self.hostlist)
                os.remove(hostfile)
                # really make sure everything is gone
                CheckForPool.CleanupPools(self.hostlist)
            finally:
                ServerUtils.killServer(self.hostlist)
Пример #7
0
class RebuildNoCap(TestWithServers):

    """
    Test Class Description:
    This class contains tests for pool rebuild.

    :avocado: recursive
    """

    def setUp(self):
        super(RebuildNoCap, self).setUp()
        # create a pool to test with
        createmode = self.params.get("mode", '/run/pool/createmode/')
        createuid = self.params.get("uid", '/run/pool/createuid/')
        creategid = self.params.get("gid", '/run/pool/creategid/')
        createsetid = self.params.get("setname", '/run/pool/createset/')
        createsize = self.params.get("size", '/run/pool/createsize/')
        self.pool = DaosPool(self.context)
        self.pool.create(createmode, createuid, creategid, createsize,
                         createsetid)
        uuid = self.pool.get_uuid_str()

        time.sleep(2)

        # stuff some bogus data into the pool
        how_many_bytes = long(self.params.get("datasize",
                                              '/run/testparams/datatowrite/'))
        exepath = self.prefix +\
                 "/../src/tests/ftest/util/write_some_data.py"
        cmd = "export DAOS_POOL={0}; export DAOS_SVCL=1; mpirun"\
              " --np 1 --host {1} {2} {3} testfile".format(
                  uuid, self.hostlist_servers[0], exepath, how_many_bytes)
        subprocess.call(cmd, shell=True)

    def tearDown(self):
        """ cleanup after the test """

        try:
            if self.pool:
                self.pool.destroy(1)
        finally:
            super(RebuildNoCap, self).tearDown()


    def test_rebuild_no_capacity(self):
        """
        :avocado: tags=pool,rebuild,nocap
        """
        try:
            print("\nsetup complete, starting test\n")

            # create a server object that references on of our pool target hosts
            # and then kill it
            svr_to_kill = int(self.params.get("rank_to_kill",
                                              '/run/testparams/ranks/'))
            d_server = DaosServer(self.context, bytes(self.server_group),
                                  svr_to_kill)

            time.sleep(1)
            d_server.kill(1)

            # exclude the target from the dead server
            self.pool.exclude([svr_to_kill])

            # exclude should trigger rebuild, check
            self.pool.connect(1 << 1)
            status = self.pool.pool_query()
            if not status.pi_ntargets == len(self.hostlist_servers):
                self.fail("target count wrong.\n")
            if not status.pi_ndisabled == 1:
                self.fail("disabled target count wrong.\n")

            # the pool should be too full to start a rebuild so
            # expecting an error
            # not sure yet specifically what error
            if status.pi_rebuild_st.rs_errno == 0:
                self.fail("expecting rebuild to fail but it didn't.\n")

        except DaosApiError as excep:
            print(excep)
            print(traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")
Пример #8
0
class PoolSvc(TestWithServers):
    """
    Tests svc argument while pool create.
    :avocado: recursive
    """
    def tearDown(self):
        try:
            if self.pool is not None and self.pool.attached:
                self.pool.destroy(1)
        finally:
            super(PoolSvc, self).tearDown()

    def test_poolsvc(self):
        """
        Test svc arg during pool create.

        :avocado: tags=pool,svc
        """

        # parameters used in pool create
        createmode = self.params.get("mode", '/run/createtests/createmode/*/')
        createuid = os.geteuid()
        creategid = os.getegid()
        createsetid = self.params.get("setname", '/run/createtests/createset/')
        createsize = self.params.get("size", '/run/createtests/createsize/')
        createsvc = self.params.get("svc", '/run/createtests/createsvc/*/')

        expected_result = createsvc[1]

        try:
            # initialize a python pool object then create the underlying
            # daos storage
            self.pool = DaosPool(self.context)
            self.pool.create(createmode, createuid, creategid, createsize,
                             createsetid, None, None, createsvc[0])
            self.pool.connect(1 << 1)

            # checking returned rank list for server more than 1
            iterator = 0
            while (int(self.pool.svc.rl_ranks[iterator]) > 0
                   and int(self.pool.svc.rl_ranks[iterator]) <= createsvc[0]
                   and int(self.pool.svc.rl_ranks[iterator]) != 999999):
                iterator += 1
            if iterator != createsvc[0]:
                self.fail("Length of Returned Rank list is not equal to "
                          "the number of Pool Service members.\n")
            rank_list = []
            for iterator in range(createsvc[0]):
                rank_list.append(int(self.pool.svc.rl_ranks[iterator]))
                if len(rank_list) != len(set(rank_list)):
                    self.fail("Duplicate values in returned rank list")

            self.pool.pool_query()
            leader = self.pool.pool_info.pi_leader
            if createsvc[0] == 3:
                # kill pool leader and exclude it
                self.pool.pool_svc_stop()
                self.pool.exclude([leader])
                # perform pool disconnect, try connect again and disconnect
                self.pool.disconnect()
                self.pool.connect(1 << 1)
                self.pool.disconnect()
                # kill another server which is not a leader and exclude it
                server = DaosServer(self.context, self.server_group,
                                    leader - 1)
                server.kill(1)
                self.pool.exclude([leader - 1])
                # perform pool connect
                self.pool.connect(1 << 1)

            if expected_result in ['FAIL']:
                self.fail("Test was expected to fail but it passed.\n")

        except DaosApiError as excep:
            print(excep)
            print(traceback.format_exc())
            if expected_result == 'PASS':
                self.fail("Test was expected to pass but it failed.\n")
Пример #9
0
    def test_multipool_rebuild(self):
        """
        Test ID: Rebuild-002
        Test Description: Expand on the basic test by rebuilding 2
        pools at once.

        Use Cases:
          -- multipool rebuild, single client, various object and record counds

        :avocado: tags=pool,rebuild,rebuildmulti
        """
        try:
            # initialize python pool object then create the underlying
            # daos storage, the way the code is now the pools should be
            # on the same storage and have the same service leader
            pool1 = DaosPool(self.context)
            pool2 = DaosPool(self.context)
            pool1.create(self.createmode, self.createuid, self.creategid,
                         self.createsize, self.createsetid)
            pool2.create(self.createmode, self.createuid, self.creategid,
                         self.createsize, self.createsetid)

            # want an open connection during rebuild
            pool1.connect(1 << 1)
            pool2.connect(1 << 1)

            # create containers
            container1 = DaosContainer(self.context)
            container1.create(pool1.handle)
            container2 = DaosContainer(self.context)
            container2.create(pool2.handle)

            # now open them
            container1.open()
            container2.open()

            # Putting the same data in both pools, at least for now to simplify
            # checking its correct
            saved_data = []
            for _objc in range(self.objcount):
                obj = None
                for _recc in range(self.reccount):

                    # make some stuff up and write
                    dkey = (
                        ''.join(random.choice(string.ascii_uppercase +
                                              string.digits) for _ in range(5)))
                    akey = (
                        ''.join(random.choice(string.ascii_uppercase +
                                              string.digits) for _ in range(5)))
                    data = (
                        ''.join(random.choice(string.ascii_uppercase +
                                              string.digits) for _ in
                                range(self.size)))

                    # Used DAOS_OC_R1S_SPEC_RANK
                    # 1 replica with specified rank
                    obj, txn = container1.write_an_obj(data, len(data), dkey,
                                                       akey, obj, self.rank,
                                                       obj_cls=15)
                    obj, txn = container2.write_an_obj(data, len(data), dkey,
                                                       akey, obj, self.rank,
                                                       obj_cls=15)
                    saved_data.append((obj, dkey, akey, data, txn))

                    # read the data back and make sure its correct containers
                    data2 = container1.read_an_obj(self.size, dkey, akey, obj,
                                                   txn)
                    if data != data2.value:
                        self.fail("Wrote data P1, read it back, didn't match\n")
                    data2 = container2.read_an_obj(self.size, dkey, akey, obj,
                                                   txn)
                    if data != data2.value:
                        self.fail("Wrote data P2, read it back, didn't match\n")

            # kill a server
            server = DaosServer(self.context, self.server_group, self.rank)
            server.kill(1)

            # temporarily, the exclude of a failed target must be done
            # manually
            pool1.exclude([self.rank])
            pool2.exclude([self.rank])

            # check that rebuild finishes, no errors, progress data as
            # know it to be.  Check pool 1 first then we'll check 2 below.
            while True:
                pool1.pool_query()
                if pool1.pool_info.pi_rebuild_st.rs_done == 1:
                    break
                else:
                    time.sleep(2)

            # check there are no errors and other data matches what we
            # apriori know to be true,
            if pool1.pool_info.pi_ndisabled != 1:
                self.fail("P1 number disabled targets reporting incorrectly: {}"
                          .format(pool1.pool_info.pi_ndisabled))
            if pool1.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("P1 rebuild error reported: {}"
                          .format(pool1.pool_info.pi_rebuild_st.rs_errno))
            if pool1.pool_info.pi_rebuild_st.rs_obj_nr != self.objcount:
                self.fail("P1 rebuilt objs not as expected: {0} {1}"
                          .format(pool1.pool_info.pi_rebuild_st.rs_obj_nr,
                                  self.objcount))
            if (pool1.pool_info.pi_rebuild_st.rs_rec_nr !=
                    (self.reccount*self.objcount)):
                self.fail("P1 rebuilt recs not as expected: {0} {1}"
                          .format(pool1.pool_info.pi_rebuild_st.rs_rec_nr,
                                  self.reccount*self.objcount))

            # now that the rebuild finished verify the records are correct
            for tup in saved_data:
                data2 = container1.read_an_obj(len(tup[3]), tup[1], tup[2],
                                               tup[0], tup[4])
                if tup[3] != data2.value:
                    self.fail("after rebuild data didn't check out")

            # now check the other pool
            while True:
                pool2.pool_query()
                if pool2.pool_info.pi_rebuild_st.rs_done == 1:
                    break
                else:
                    time.sleep(2)

            # check there are no errors and other data matches what we
            # apriori know to be true
            if pool2.pool_info.pi_ndisabled != 1:
                self.fail("Number disabled targets reporting incorrectly: {}"
                          .format(pool2.pool_info.pi_ndisabled))
            if pool2.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error reported: {}"
                          .format(pool2.pool_info.pi_rebuild_st.rs_errno))
            if pool2.pool_info.pi_rebuild_st.rs_obj_nr != self.objcount:
                self.fail("Rebuilt objs not as expected: {0} {1}"
                          .format(pool2.pool_info.pi_rebuild_st.rs_obj_nr,
                                  self.objcount))
            if (pool2.pool_info.pi_rebuild_st.rs_rec_nr !=
                    (self.reccount*self.objcount)):
                self.fail("Rebuilt recs not as expected: {0} {1}".
                          format(pool2.pool_info.pi_rebuild_st.rs_rec_nr,
                                 (self.reccount*self.objcount)))

            # now that the rebuild finished verify the records are correct
            for tup in saved_data:
                data2 = container2.read_an_obj(len(tup[3]), tup[1], tup[2],
                                               tup[0], tup[4])
                if tup[3] != data2.value:
                    self.fail("after rebuild data didn't check out")

        except DaosApiError as excp:
            print (excp)
            print (traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")
Пример #10
0
    def test_simple_rebuild(self):
        """
        Test ID: Rebuild-001

        Test Description: The most basic rebuild test.

        Use Cases:
          -- single pool rebuild, single client, various reord/object
             counts

        :avocado: tags=pool,rebuild,rebuildsimple
        """
        try:

            # initialize a python pool object then create the underlying
            # daos storage
            pool = DaosPool(self.context)
            pool.create(self.createmode, self.createuid, self.creategid,
                        self.createsize, self.createsetid)

            # want an open connection during rebuild
            pool.connect(1 << 1)

            # get pool status we want to test later
            pool.pool_query()
            if pool.pool_info.pi_ndisabled != 0:
                self.fail("Number of disabled targets reporting incorrectly.\n")
            if pool.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error but rebuild hasn't run.\n")
            if pool.pool_info.pi_rebuild_st.rs_done != 1:
                self.fail("Rebuild is running but device hasn't failed yet.\n")
            if pool.pool_info.pi_rebuild_st.rs_obj_nr != 0:
                self.fail("Rebuilt objs not zero.\n")
            if pool.pool_info.pi_rebuild_st.rs_rec_nr != 0:
                self.fail("Rebuilt recs not zero.\n")

            # create a container
            container = DaosContainer(self.context)
            container.create(pool.handle)

            # now open it
            container.open()

            saved_data = []
            for _objc in range(self.objcount):
                obj = None
                for _recc in range(self.reccount):
                    # make some stuff up and write
                    dkey = (
                        ''.join(random.choice(string.ascii_uppercase +
                                              string.digits) for _ in range(5)))
                    akey = (
                        ''.join(random.choice(string.ascii_uppercase +
                                              string.digits) for _ in range(5)))
                    data = (''.join(random.choice(string.ascii_uppercase +
                                                  string.digits)
                                    for _ in range(self.size)))

                    obj, txn = container.write_an_obj(data, len(data), dkey,
                                                      akey, obj, self.rank,
                                                      obj_cls=16)

                    saved_data.append((obj, dkey, akey, data, txn))

                    # read the data back and make sure its correct
                    data2 = container.read_an_obj(self.size, dkey, akey, obj,
                                                  txn)
                    if data != data2.value:
                        self.fail("Write data 1, read it back, didn't match\n")

            # kill a server that has
            server = DaosServer(self.context, self.server_group, self.rank)
            server.kill(1)

            # temporarily, the exclude of a failed target must be done manually
            pool.exclude([self.rank])

            while True:
                # get the pool/rebuild status again
                pool.pool_query()
                if pool.pool_info.pi_rebuild_st.rs_done == 1:
                    break
                else:
                    time.sleep(2)

            if pool.pool_info.pi_ndisabled != 1:
                self.fail("Number of disabled targets reporting incorrectly: {}"
                          .format(pool.pool_info.pi_ndisabled))
            if pool.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error reported: {}"
                          .format(pool.pool_info.pi_rebuild_st.rs_errno))
            if pool.pool_info.pi_rebuild_st.rs_obj_nr != self.objcount:
                self.fail("Rebuilt objs not as expected: {0} {1}"
                          .format(pool.pool_info.pi_rebuild_st.rs_obj_nr,
                                  self.objcount))
            if (pool.pool_info.pi_rebuild_st.rs_rec_nr !=
                    (self.reccount*self.objcount)):
                self.fail("Rebuilt recs not as expected: {0} {1}"
                          .format(pool.pool_info.pi_rebuild_st.rs_rec_nr,
                                  self.reccount*self.objcount))

            # now that the rebuild finished verify the records are correct
            for tup in saved_data:
                data2 = container.read_an_obj(len(tup[3]), tup[1], tup[2],
                                              tup[0], tup[4])
                if tup[3] != data2.value:
                    self.fail("after rebuild data didn't check out")

        except DaosApiError as excp:
            print (excp)
            print (traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")
Пример #11
0
    def test_simple_rebuild(self):
        """
        Test ID: Rebuild-001

        Test Description: The most basic rebuild test.

        Use Cases:
          -- single pool rebuild, single client, various reord/object
             counts

        :avocado: tags=pool,rebuild,rebuildsimple
        """
        try:

            # initialize a python pool object then create the underlying
            # daos storage
            pool = DaosPool(self.context)
            pool.create(self.createmode, self.createuid, self.creategid,
                        self.createsize, self.createsetid)

            # want an open connection during rebuild
            pool.connect(1 << 1)

            # get pool status we want to test later
            pool.pool_query()
            if pool.pool_info.pi_ndisabled != 0:
                self.fail(
                    "Number of disabled targets reporting incorrectly.\n")
            if pool.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error but rebuild hasn't run.\n")
            if pool.pool_info.pi_rebuild_st.rs_done != 1:
                self.fail("Rebuild is running but device hasn't failed yet.\n")
            if pool.pool_info.pi_rebuild_st.rs_obj_nr != 0:
                self.fail("Rebuilt objs not zero.\n")
            if pool.pool_info.pi_rebuild_st.rs_rec_nr != 0:
                self.fail("Rebuilt recs not zero.\n")

            # create a container
            container = DaosContainer(self.context)
            container.create(pool.handle)

            # now open it
            container.open()

            saved_data = []
            for _objc in range(self.objcount):
                obj = None
                for _recc in range(self.reccount):
                    # make some stuff up and write
                    dkey = (''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(5)))
                    akey = (''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(5)))
                    data = (''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(self.size)))

                    obj, txn = container.write_an_obj(data,
                                                      len(data),
                                                      dkey,
                                                      akey,
                                                      obj,
                                                      self.rank,
                                                      obj_cls=16)

                    saved_data.append((obj, dkey, akey, data, txn))

                    # read the data back and make sure its correct
                    data2 = container.read_an_obj(self.size, dkey, akey, obj,
                                                  txn)
                    if data != data2.value:
                        self.fail("Write data 1, read it back, didn't match\n")

            # kill a server that has
            server = DaosServer(self.context, self.server_group, self.rank)
            server.kill(1)

            # temporarily, the exclude of a failed target must be done manually
            pool.exclude([self.rank])

            while True:
                # get the pool/rebuild status again
                pool.pool_query()
                if pool.pool_info.pi_rebuild_st.rs_done == 1:
                    break
                else:
                    time.sleep(2)

            if pool.pool_info.pi_ndisabled != 1:
                self.fail(
                    "Number of disabled targets reporting incorrectly: {}".
                    format(pool.pool_info.pi_ndisabled))
            if pool.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error reported: {}".format(
                    pool.pool_info.pi_rebuild_st.rs_errno))
            if pool.pool_info.pi_rebuild_st.rs_obj_nr != self.objcount:
                self.fail("Rebuilt objs not as expected: {0} {1}".format(
                    pool.pool_info.pi_rebuild_st.rs_obj_nr, self.objcount))
            if (pool.pool_info.pi_rebuild_st.rs_rec_nr !=
                (self.reccount * self.objcount)):
                self.fail("Rebuilt recs not as expected: {0} {1}".format(
                    pool.pool_info.pi_rebuild_st.rs_rec_nr,
                    self.reccount * self.objcount))

            # now that the rebuild finished verify the records are correct
            for tup in saved_data:
                data2 = container.read_an_obj(len(tup[3]), tup[1], tup[2],
                                              tup[0], tup[4])
                if tup[3] != data2.value:
                    self.fail("after rebuild data didn't check out")

        except DaosApiError as excp:
            print(excp)
            print(traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")
Пример #12
0
    def test_multipool_rebuild(self):
        """
        Test ID: Rebuild-002
        Test Description: Expand on the basic test by rebuilding 2
        pools at once.

        Use Cases:
          -- multipool rebuild, single client, various object and record counds

        :avocado: tags=pool,rebuild,rebuildmulti
        """
        try:
            # initialize python pool object then create the underlying
            # daos storage, the way the code is now the pools should be
            # on the same storage and have the same service leader
            pool1 = DaosPool(self.context)
            pool2 = DaosPool(self.context)
            pool1.create(self.createmode, self.createuid, self.creategid,
                         self.createsize, self.createsetid)
            pool2.create(self.createmode, self.createuid, self.creategid,
                         self.createsize, self.createsetid)

            # want an open connection during rebuild
            pool1.connect(1 << 1)
            pool2.connect(1 << 1)

            # create containers
            container1 = DaosContainer(self.context)
            container1.create(pool1.handle)
            container2 = DaosContainer(self.context)
            container2.create(pool2.handle)

            # now open them
            container1.open()
            container2.open()

            # Putting the same data in both pools, at least for now to simplify
            # checking its correct
            saved_data = []
            for _objc in range(self.objcount):
                obj = None
                for _recc in range(self.reccount):

                    # make some stuff up and write
                    dkey = (''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(5)))
                    akey = (''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(5)))
                    data = (''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(self.size)))

                    # Used DAOS_OC_R1S_SPEC_RANK
                    # 1 replica with specified rank
                    obj, txn = container1.write_an_obj(data,
                                                       len(data),
                                                       dkey,
                                                       akey,
                                                       obj,
                                                       self.rank,
                                                       obj_cls=15)
                    obj, txn = container2.write_an_obj(data,
                                                       len(data),
                                                       dkey,
                                                       akey,
                                                       obj,
                                                       self.rank,
                                                       obj_cls=15)
                    saved_data.append((obj, dkey, akey, data, txn))

                    # read the data back and make sure its correct containers
                    data2 = container1.read_an_obj(self.size, dkey, akey, obj,
                                                   txn)
                    if data != data2.value:
                        self.fail(
                            "Wrote data P1, read it back, didn't match\n")
                    data2 = container2.read_an_obj(self.size, dkey, akey, obj,
                                                   txn)
                    if data != data2.value:
                        self.fail(
                            "Wrote data P2, read it back, didn't match\n")

            # kill a server
            server = DaosServer(self.context, self.server_group, self.rank)
            server.kill(1)

            # temporarily, the exclude of a failed target must be done
            # manually
            pool1.exclude([self.rank])
            pool2.exclude([self.rank])

            # check that rebuild finishes, no errors, progress data as
            # know it to be.  Check pool 1 first then we'll check 2 below.
            while True:
                pool1.pool_query()
                if pool1.pool_info.pi_rebuild_st.rs_done == 1:
                    break
                else:
                    time.sleep(2)

            # check there are no errors and other data matches what we
            # apriori know to be true,
            if pool1.pool_info.pi_ndisabled != 1:
                self.fail(
                    "P1 number disabled targets reporting incorrectly: {}".
                    format(pool1.pool_info.pi_ndisabled))
            if pool1.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("P1 rebuild error reported: {}".format(
                    pool1.pool_info.pi_rebuild_st.rs_errno))
            if pool1.pool_info.pi_rebuild_st.rs_obj_nr != self.objcount:
                self.fail("P1 rebuilt objs not as expected: {0} {1}".format(
                    pool1.pool_info.pi_rebuild_st.rs_obj_nr, self.objcount))
            if (pool1.pool_info.pi_rebuild_st.rs_rec_nr !=
                (self.reccount * self.objcount)):
                self.fail("P1 rebuilt recs not as expected: {0} {1}".format(
                    pool1.pool_info.pi_rebuild_st.rs_rec_nr,
                    self.reccount * self.objcount))

            # now that the rebuild finished verify the records are correct
            for tup in saved_data:
                data2 = container1.read_an_obj(len(tup[3]), tup[1], tup[2],
                                               tup[0], tup[4])
                if tup[3] != data2.value:
                    self.fail("after rebuild data didn't check out")

            # now check the other pool
            while True:
                pool2.pool_query()
                if pool2.pool_info.pi_rebuild_st.rs_done == 1:
                    break
                else:
                    time.sleep(2)

            # check there are no errors and other data matches what we
            # apriori know to be true
            if pool2.pool_info.pi_ndisabled != 1:
                self.fail(
                    "Number disabled targets reporting incorrectly: {}".format(
                        pool2.pool_info.pi_ndisabled))
            if pool2.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error reported: {}".format(
                    pool2.pool_info.pi_rebuild_st.rs_errno))
            if pool2.pool_info.pi_rebuild_st.rs_obj_nr != self.objcount:
                self.fail("Rebuilt objs not as expected: {0} {1}".format(
                    pool2.pool_info.pi_rebuild_st.rs_obj_nr, self.objcount))
            if (pool2.pool_info.pi_rebuild_st.rs_rec_nr !=
                (self.reccount * self.objcount)):
                self.fail("Rebuilt recs not as expected: {0} {1}".format(
                    pool2.pool_info.pi_rebuild_st.rs_rec_nr,
                    (self.reccount * self.objcount)))

            # now that the rebuild finished verify the records are correct
            for tup in saved_data:
                data2 = container2.read_an_obj(len(tup[3]), tup[1], tup[2],
                                               tup[0], tup[4])
                if tup[3] != data2.value:
                    self.fail("after rebuild data didn't check out")

        except DaosApiError as excp:
            print(excp)
            print(traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")

        finally:
            server_utils.stop_server(hosts=self.hostlist_servers)
            check_for_pool.cleanup_pools(self.hostlist_servers)
            server_utils.kill_server(self.hostlist_servers)
Пример #13
0
class TestPool(TestDaosApiBase):
    """A class for functional testing of DaosPools objects."""

    def __init__(self, context, log, cb_handler=None):
        """[summary].

        Args:
            context (DaosContext): [description]
            log (logging): logging object used to report the pool status
            cb_handler (CallbackHandler, optional): callback object to use with
                the API methods. Defaults to None.
        """
        super(TestPool, self).__init__(cb_handler)
        self.context = context
        self.log = log
        self.uid = os.geteuid()
        self.gid = os.getegid()

        self.mode = TestParameter(None)
        self.name = TestParameter(None)
        self.group = TestParameter(None)
        self.svcn = TestParameter(None)
        self.target_list = TestParameter(None)
        self.scm_size = TestParameter(None)
        self.nvme_size = TestParameter(None)

        self.pool = None
        self.uuid = None
        self.info = None
        self.connected = False

    def get_params(self, test, path="/run/pool/*"):
        """Get the pool parameters from the yaml file.

        Args:
            test (Test): avocado Test object
            path (str, optional): yaml namespace. Defaults to "/run/pool/*".
        """
        super(TestPool, self).get_params(test, path)

    @fail_on(DaosApiError)
    def create(self):
        """Create a pool.

        Destroys an existing pool if defined and assigns self.pool and
        self.uuid.
        """
        self.destroy()
        self.log.info("Creating a pool")
        self.pool = DaosPool(self.context)
        kwargs = {
            "mode": self.mode.value, "uid": self.uid, "gid": self.gid,
            "scm_size": self.scm_size.value, "group": self.name.value}
        for key in ("target_list", "svcn", "nvme_size"):
            value = getattr(self, key).value
            if value:
                kwargs[key] = value
        self._call_method(self.pool.create, kwargs)
        self.uuid = self.pool.get_uuid_str()

    @fail_on(DaosApiError)
    def connect(self, permission=1):
        """Connect to the pool.

        Args:
            permission (int, optional): connect permission. Defaults to 1.

        Returns:
            bool: True if the pool has been connected; False if the pool was
                already connected or the pool is not defined.

        """
        if self.pool and not self.connected:
            kwargs = {"flags": 1 << permission}
            self.log.info(
                "Connecting to pool %s with permission %s (flag: %s)",
                self.uuid, permission, kwargs["flags"])
            self._call_method(self.pool.connect, kwargs)
            self.connected = True
            return True
        return False

    @fail_on(DaosApiError)
    def disconnect(self):
        """Disconnect from connected pool.

        Returns:
            bool: True if the pool has been disconnected; False if the pool was
                already disconnected or the pool is not defined.

        """
        if self.pool and self.connected:
            self.log.info("Disonnecting from pool %s", self.uuid)
            self._call_method(self.pool.disconnect, {})
            self.connected = False
            return True
        return False

    @fail_on(DaosApiError)
    def destroy(self, force=1):
        """Destroy the pool.

        Args:
            force (int, optional): force flag. Defaults to 1.

        Returns:
            bool: True if the pool has been destoyed; False if the pool is not
                defined.

        """
        if self.pool:
            self.disconnect()
            self.log.info("Destroying pool %s", self.uuid)
            self._call_method(self.pool.destroy, {"force": force})
            self.pool = None
            self.uuid = None
            self.info = None
            return True
        return False

    @fail_on(DaosApiError)
    def get_info(self):
        """Query the pool for information.

        Sets the self.info attribute.
        """
        if self.pool:
            self.connect()
            self._call_method(self.pool.pool_query, {})
            self.info = self.pool.pool_info

    def check_pool_info(self, pi_uuid=None, pi_ntargets=None, pi_nnodes=None,
                        pi_ndisabled=None, pi_map_ver=None, pi_leader=None,
                        pi_bits=None):
        # pylint: disable=unused-argument
        """Check the pool info attributes.

        Args:
            pi_uuid (str, optional): pool uuid. Defaults to None.
            pi_ntargets (int, optional): number of targets. Defaults to None.
            pi_nnodes (int, optional): number of nodes. Defaults to None.
            pi_ndisabled (int, optional): number of disabled. Defaults to None.
            pi_map_ver (int, optional): pool map version. Defaults to None.
            pi_leader (int, optional): pool leader. Defaults to None.
            pi_bits (int, optional): pool bits. Defaults to None.

        Returns:
            bool: True if at least one expected value is specified and all the
                specified values match; False otherwise

        """
        self.get_info()
        checks = [
            (key,
             c_uuid_to_str(getattr(self.info, key))
             if key == "pi_uuid" else getattr(self.info, key),
             val)
            for key, val in locals().items()
            if key != "self" and val is not None]
        return self._check_info(checks)

    def check_pool_space(self, ps_free_min=None, ps_free_max=None,
                         ps_free_mean=None, ps_ntargets=None, ps_padding=None):
        # pylint: disable=unused-argument
        """Check the pool info space attributes.

        Args:
            ps_free_min (list, optional): minimum free space per device.
                Defaults to None.
            ps_free_max (list, optional): maximum free space per device.
                Defaults to None.
            ps_free_mean (list, optional): mean free space per device.
                Defaults to None.
            ps_ntargets (int, optional): number of targets. Defaults to None.
            ps_padding (int, optional): space padding. Defaults to None.

        Returns:
            bool: True if at least one expected value is specified and all the
                specified values match; False otherwise

        """
        self.get_info()
        checks = []
        for key in ("ps_free_min", "ps_free_max", "ps_free_mean"):
            val = locals()[key]
            if isinstance(val, list):
                for index, item in val:
                    checks.append((
                        "{}[{}]".format(key, index),
                        getattr(self.info.pi_space, key)[index],
                        item))
        for key in ("ps_ntargets", "ps_padding"):
            val = locals()[key]
            if val is not None:
                checks.append(key, getattr(self.info.pi_space, key), val)
        return self._check_info(checks)

    def check_pool_daos_space(self, s_total=None, s_free=None):
        # pylint: disable=unused-argument
        """Check the pool info daos space attributes.

        Args:
            s_total (list, optional): total space per device. Defaults to None.
            s_free (list, optional): free space per device. Defaults to None.

        Returns:
            bool: True if at least one expected value is specified and all the
                specified values match; False otherwise

        """
        self.get_info()
        checks = [
            ("{}_{}".format(key, index),
             getattr(self.info.pi_space.ps_space, key)[index],
             item)
            for key, val in locals().items()
            if key != "self" and val is not None
            for index, item in enumerate(val)]
        return self._check_info(checks)

    def check_rebuild_status(self, rs_version=None, rs_pad_32=None,
                             rs_errno=None, rs_done=None,
                             rs_toberb_obj_nr=None, rs_obj_nr=None,
                             rs_rec_nr=None):
        # pylint: disable=unused-argument
        """Check the pool info rebuild attributes.

        Args:
            rs_version (int, optional): rebuild version. Defaults to None.
            rs_pad_32 (int, optional): rebuild pad. Defaults to None.
            rs_errno (int, optional): rebuild error number. Defaults to None.
            rs_done (int, optional): rebuild done flag. Defaults to None.
            rs_toberb_obj_nr (int, optional): number of objects to be rebuilt.
                Defaults to None.
            rs_obj_nr (int, optional): number of rebuilt objects.
                Defaults to None.
            rs_rec_nr (int, optional): number of rebuilt records.
                Defaults to None.

        Returns:
            bool: True if at least one expected value is specified and all the
                specified values match; False otherwise

        """
        self.get_info()
        checks = [
            (key, getattr(self.info.pi_rebuild_st, key), val)
            for key, val in locals().items()
            if key != "self" and val is not None]
        return self._check_info(checks)

    def _check_info(self, check_list):
        """Verify each pool info attribute value matches an expected value.

        Args:
            check_list (list): a list of tuples containing the name of the pool
                information attribute to check, the current value of the
                attribute, and the expected value of the attribute.

        Returns:
            bool: True if at least one check has been specified and all the
            actual and expected values match; False otherwise.

        """
        check_status = len(check_list) > 0
        for check, actual, expect in check_list:
            self.log.info(
                "Verifying the pool %s: %s ?= %s", check, actual, expect)
            if actual != expect:
                msg = "The {} does not match: actual: {}, expected: {}".format(
                    check, actual, expect)
                self.log.error(msg)
                check_status = False
        return check_status

    def rebuild_complete(self):
        """Determine if the pool rebuild is complete.

        Returns:
            bool: True if pool rebuild is complete; False otherwise

        """
        self.get_info()
        return self.info.pi_rebuild_st.rs_done == 1

    def wait_for_rebuild(self, to_start, interval=1):
        """Wait for the rebuild to start or end.

        Args:
            to_start (bool): whether to wait for rebuild to start or end
            interval (int): number of seconds to wait in between rebuild
                completion checks
        """
        self.log.info(
            "Waiting for rebuild to %s ...",
            "start" if to_start else "complete")
        while self.rebuild_complete() == to_start:
            self.log.info(
                "  Rebuild %s ...",
                "has not yet started" if to_start else "in progress")
            sleep(interval)
        self.log.info(
            "Rebuild %s detected", "start" if to_start else "completion")

    @fail_on(DaosApiError)
    def start_rebuild(self, server_group, rank, daos_log):
        """Kill a specific server rank using this pool.

        Args:
            server_group (str): daos server group name
            rank (int): daos server rank to kill
            daos_log (DaosLog): object for logging messages
        """
        msg = "Killing DAOS server {} (rank {})".format(server_group, rank)
        self.log.info(msg)
        daos_log.info(msg)
        server = DaosServer(self.context, server_group, rank)
        server.kill(1)
        msg = "Excluding server rank {} from pool {}".format(rank, self.uuid)
        self.log.info(msg)
        daos_log.info(msg)
        self.pool.exclude([rank])

    def check_files(self, hosts):
        """Check if pool files exist on the specified list of hosts.

        Args:
            hosts (list): list of hosts

        Returns:
            bool: True if the files for this pool exist on each host; False
                otherwise

        """
        return check_pool_files(self.log, hosts, self.uuid.lower())

    def write_file(self, orterun, processes, hostfile, size, timeout=60):
        """Write a file to the pool.

        Args:
            orterun (str): full path to the orterun command
            processes (int): number of processes to launch
            hosts (list): list of clients from which to write the file
            size (int): size of the file to create in bytes
            timeout (int, optional): number of seconds before timing out the
                command. Defaults to 60 seconds.

        Returns:
            process.CmdResult: command execution result

        """
        self.log.info("Writing {} bytes to pool {}".format(size, self.uuid))
        env = {
            "DAOS_POOL": self.uuid,
            "DAOS_SVCL": "1",
            "DAOS_SINGLETON_CLI": "1",
        }
        current_path = os.path.dirname(os.path.abspath(__file__))
        command = "{} --np {} --hostfile {} {} {} testfile".format(
            orterun, processes, hostfile,
            os.path.join(current_path, "write_some_data.py"), size)
        return process.run(command, timeout, True, False, "both", True, env)
Пример #14
0
    def test_exclude(self):
        """
        Pass bad parameters to pool connect

        :avocado: tags=pool,poolexclude,badparam,badexclude
        """
        # parameters used in pool create
        createmode = self.params.get("mode", '/run/pool/createmode/')
        createsetid = self.params.get("setname", '/run/pool/createset/')
        createsize = self.params.get("size", '/run/pool/createsize/')

        createuid = os.geteuid()
        creategid = os.getegid()

        # Accumulate a list of pass/fail indicators representing what is
        # expected for each parameter then "and" them to determine the
        # expected result of the test
        expected_for_param = []

        tgtlist = self.params.get("ranklist", '/run/testparams/tgtlist/*/')
        targets = []

        if tgtlist[0] == "NULLPTR":
            targets = None
            self.cancel("skipping null pointer test until DAOS-1929 is fixed")
        else:
            targets.append(tgtlist[0])
        expected_for_param.append(tgtlist[1])

        svclist = self.params.get("ranklist", '/run/testparams/svrlist/*/')
        svc = svclist[0]
        expected_for_param.append(svclist[1])

        setlist = self.params.get("setname",
                                  '/run/testparams/connectsetnames/*/')
        connectset = setlist[0]
        expected_for_param.append(setlist[1])

        uuidlist = self.params.get("uuid", '/run/testparams/UUID/*/')
        excludeuuid = uuidlist[0]
        expected_for_param.append(uuidlist[1])

        # if any parameter is FAIL then the test should FAIL, in this test
        # virtually everyone should FAIL since we are testing bad parameters
        expected_result = 'PASS'
        for result in expected_for_param:
            if result == 'FAIL':
                expected_result = 'FAIL'
                break

        saved_svc = None
        saved_grp = None
        saved_uuid = None
        pool = None
        try:
            # setup the DAOS python API
            with open('../../../.build_vars.json') as build_file:
                data = json.load(build_file)
            context = DaosContext(data['PREFIX'] + '/lib/')

            # initialize a python pool object then create the underlying
            # daos storage
            pool = DaosPool(context)
            pool.create(createmode, createuid, creategid,
                        createsize, createsetid, None)

            # trash the the pool service rank list
            if not svc == 'VALID':
                self.cancel("skipping this test until DAOS-1931 is fixed")
                saved_svc = RankList(pool.svc.rl_ranks, pool.svc.rl_nr)
                pool.svc = None

            # trash the pool group value
            if connectset == 'NULLPTR':
                saved_grp = pool.group
                pool.group = None

            # trash the UUID value in various ways
            if excludeuuid == 'NULLPTR':
                self.cancel("skipping this test until DAOS-1932 is fixed")
                ctypes.memmove(saved_uuid, pool.uuid, 16)
                pool.uuid = 0
            if excludeuuid == 'CRAP':
                self.cancel("skipping this test until DAOS-1932 is fixed")
                ctypes.memmove(saved_uuid, pool.uuid, 16)
                pool.uuid[4] = 244

            pool.exclude(targets)

            if expected_result in ['FAIL']:
                self.fail("Test was expected to fail but it passed.\n")

        except DaosApiError as excep:
            print(excep)
            print(traceback.format_exc())
            if expected_result in ['PASS']:
                self.fail("Test was expected to pass but it failed.\n")
        finally:
            if pool is not None:
                if saved_svc is not None:
                    pool.svc = saved_svc
                if saved_grp is not None:
                    pool.group = saved_grp
                if saved_uuid is not None:
                    ctypes.memmove(pool.uuid, saved_uuid, 16)

                pool.destroy(1)
Пример #15
0
class TestPool(TestDaosApiBase):
    """A class for functional testing of DaosPools objects."""

    def __init__(self, context, log, cb_handler=None):
        """[summary].

        Args:
            context (DaosContext): [description]
            log (logging): logging object used to report the pool status
            cb_handler (CallbackHandler, optional): callback object to use with
                the API methods. Defaults to None.
        """
        super(TestPool, self).__init__("/run/pool/*", cb_handler)
        self.context = context
        self.log = log
        self.uid = os.geteuid()
        self.gid = os.getegid()

        self.mode = BasicParameter(None)
        self.name = BasicParameter(None)
        self.group = BasicParameter(None)
        self.svcn = BasicParameter(None)
        self.target_list = BasicParameter(None)
        self.scm_size = BasicParameter(None)
        self.nvme_size = BasicParameter(None)

        self.pool = None
        self.uuid = None
        self.info = None
        self.svc_ranks = None
        self.connected = False

    @fail_on(DaosApiError)
    def create(self):
        """Create a pool.

        Destroys an existing pool if defined and assigns self.pool and
        self.uuid.
        """
        self.destroy()
        self.log.info(
            "Creating a pool{}".format(
                " on targets {}".format(self.target_list.value)
                if self.target_list.value else ""))
        self.pool = DaosPool(self.context)
        kwargs = {
            "mode": self.mode.value, "uid": self.uid, "gid": self.gid,
            "scm_size": self.scm_size.value, "group": self.name.value}
        for key in ("target_list", "svcn", "nvme_size"):
            value = getattr(self, key).value
            if value:
                kwargs[key] = value
        self._call_method(self.pool.create, kwargs)
        self.uuid = self.pool.get_uuid_str()
        self.svc_ranks = [
            int(self.pool.svc.rl_ranks[index])
            for index in range(self.pool.svc.rl_nr)]
        self.log.info("  Pool created with uuid {} and svc ranks {}".format(
            self.uuid, self.svc_ranks))

    @fail_on(DaosApiError)
    def connect(self, permission=1):
        """Connect to the pool.

        Args:
            permission (int, optional): connect permission. Defaults to 1.

        Returns:
            bool: True if the pool has been connected; False if the pool was
                already connected or the pool is not defined.

        """
        if self.pool and not self.connected:
            kwargs = {"flags": 1 << permission}
            self.log.info(
                "Connecting to pool %s with permission %s (flag: %s)",
                self.uuid, permission, kwargs["flags"])
            self._call_method(self.pool.connect, kwargs)
            self.connected = True
            return True
        return False

    @fail_on(DaosApiError)
    def disconnect(self):
        """Disconnect from connected pool.

        Returns:
            bool: True if the pool has been disconnected; False if the pool was
                already disconnected or the pool is not defined.

        """
        if self.pool and self.connected:
            self.log.info("Disonnecting from pool %s", self.uuid)
            self._call_method(self.pool.disconnect, {})
            self.connected = False
            return True
        return False

    @fail_on(DaosApiError)
    def destroy(self, force=1):
        """Destroy the pool.

        Args:
            force (int, optional): force flag. Defaults to 1.

        Returns:
            bool: True if the pool has been destroyed; False if the pool is not
                defined.

        """
        if self.pool:
            self.disconnect()
            self.log.info("Destroying pool %s", self.uuid)
            if self.pool.attached:
                self._call_method(self.pool.destroy, {"force": force})
            self.pool = None
            self.uuid = None
            self.info = None
            self.svc_ranks = None
            return True
        return False

    @fail_on(DaosApiError)
    def get_info(self):
        """Query the pool for information.

        Sets the self.info attribute.
        """
        if self.pool:
            self.connect()
            self._call_method(self.pool.pool_query, {})
            self.info = self.pool.pool_info

    def check_pool_info(self, pi_uuid=None, pi_ntargets=None, pi_nnodes=None,
                        pi_ndisabled=None, pi_map_ver=None, pi_leader=None,
                        pi_bits=None):
        # pylint: disable=unused-argument
        """Check the pool info attributes.

        Args:
            pi_uuid (str, optional): pool uuid. Defaults to None.
            pi_ntargets (int, optional): number of targets. Defaults to None.
            pi_nnodes (int, optional): number of nodes. Defaults to None.
            pi_ndisabled (int, optional): number of disabled. Defaults to None.
            pi_map_ver (int, optional): pool map version. Defaults to None.
            pi_leader (int, optional): pool leader. Defaults to None.
            pi_bits (int, optional): pool bits. Defaults to None.

        Note:
            Arguments may also be provided as a string with a number preceeded
            by '<', '<=', '>', or '>=' for other comparisions besides the
            default '=='.

        Returns:
            bool: True if at least one expected value is specified and all the
                specified values match; False otherwise

        """
        self.get_info()
        checks = [
            (key,
             c_uuid_to_str(getattr(self.info, key))
             if key == "pi_uuid" else getattr(self.info, key),
             val)
            for key, val in locals().items()
            if key != "self" and val is not None]
        return self._check_info(checks)

    def check_pool_space(self, ps_free_min=None, ps_free_max=None,
                         ps_free_mean=None, ps_ntargets=None, ps_padding=None):
        # pylint: disable=unused-argument
        """Check the pool info space attributes.

        Args:
            ps_free_min (list, optional): minimum free space per device.
                Defaults to None.
            ps_free_max (list, optional): maximum free space per device.
                Defaults to None.
            ps_free_mean (list, optional): mean free space per device.
                Defaults to None.
            ps_ntargets (int, optional): number of targets. Defaults to None.
            ps_padding (int, optional): space padding. Defaults to None.

        Note:
            Arguments may also be provided as a string with a number preceeded
            by '<', '<=', '>', or '>=' for other comparisions besides the
            default '=='.

        Returns:
            bool: True if at least one expected value is specified and all the
                specified values match; False otherwise

        """
        self.get_info()
        checks = []
        for key in ("ps_free_min", "ps_free_max", "ps_free_mean"):
            val = locals()[key]
            if isinstance(val, list):
                for index, item in val:
                    checks.append((
                        "{}[{}]".format(key, index),
                        getattr(self.info.pi_space, key)[index],
                        item))
        for key in ("ps_ntargets", "ps_padding"):
            val = locals()[key]
            if val is not None:
                checks.append(key, getattr(self.info.pi_space, key), val)
        return self._check_info(checks)

    def check_pool_daos_space(self, s_total=None, s_free=None):
        # pylint: disable=unused-argument
        """Check the pool info daos space attributes.

        Args:
            s_total (list, optional): total space per device. Defaults to None.
            s_free (list, optional): free space per device. Defaults to None.

        Note:
            Arguments may also be provided as a string with a number preceeded
            by '<', '<=', '>', or '>=' for other comparisions besides the
            default '=='.

        Returns:
            bool: True if at least one expected value is specified and all the
                specified values match; False otherwise

        """
        self.get_info()
        checks = [
            ("{}_{}".format(key, index),
             getattr(self.info.pi_space.ps_space, key)[index],
             item)
            for key, val in locals().items()
            if key != "self" and val is not None
            for index, item in enumerate(val)]
        return self._check_info(checks)

    def check_rebuild_status(self, rs_version=None, rs_pad_32=None,
                             rs_errno=None, rs_done=None,
                             rs_toberb_obj_nr=None, rs_obj_nr=None,
                             rs_rec_nr=None):
        # pylint: disable=unused-argument
        """Check the pool info rebuild attributes.

        Args:
            rs_version (int, optional): rebuild version. Defaults to None.
            rs_pad_32 (int, optional): rebuild pad. Defaults to None.
            rs_errno (int, optional): rebuild error number. Defaults to None.
            rs_done (int, optional): rebuild done flag. Defaults to None.
            rs_toberb_obj_nr (int, optional): number of objects to be rebuilt.
                Defaults to None.
            rs_obj_nr (int, optional): number of rebuilt objects.
                Defaults to None.
            rs_rec_nr (int, optional): number of rebuilt records.
                Defaults to None.

        Note:
            Arguments may also be provided as a string with a number preceeded
            by '<', '<=', '>', or '>=' for other comparisions besides the
            default '=='.

        Returns:
            bool: True if at least one expected value is specified and all the
                specified values match; False otherwise

        """
        self.get_info()
        checks = [
            (key, getattr(self.info.pi_rebuild_st, key), val)
            for key, val in locals().items()
            if key != "self" and val is not None]
        return self._check_info(checks)

    def _check_info(self, check_list):
        """Verify each pool info attribute value matches an expected value.

        Args:
            check_list (list): a list of tuples containing the name of the pool
                information attribute to check, the current value of the
                attribute, and the expected value of the attribute. If the
                expected value is specified as a string with a number preceeded
                by '<', '<=', '>', or '>=' then this comparision will be used
                instead of the defult '=='.

        Returns:
            bool: True if at least one check has been specified and all the
            actual and expected values match; False otherwise.

        """
        check_status = len(check_list) > 0
        for check, actual, expect in check_list:
            # Determine which comparision to utilize for this check
            compare = ("==", lambda x, y: x == y, "does not match")
            if isinstance(expect, str):
                comparisions = {
                    "<": (lambda x, y: x < y, "is too large"),
                    ">": (lambda x, y: x > y, "is too small"),
                    "<=": (
                        lambda x, y: x <= y, "is too large or does not match"),
                    ">=": (
                        lambda x, y: x >= y, "is too small or does not match"),
                }
                for key, val in comparisions.items():
                    # If the expected value is preceeded by one of the known
                    # comparision keys, use the comparision and remove the key
                    # from the expected value
                    if expect[:len(key)] == key:
                        compare = (key, val[0], val[1])
                        expect = expect[len(key):]
                        try:
                            expect = int(expect)
                        except ValueError:
                            # Allow strings to be strings
                            pass
                        break
            self.log.info(
                "Verifying the pool %s: %s %s %s",
                check, actual, compare[0], expect)
            if not compare[1](actual, expect):
                msg = "  The {} {}: actual={}, expected={}".format(
                    check, compare[2], actual, expect)
                self.log.error(msg)
                check_status = False
        return check_status

    def rebuild_complete(self):
        """Determine if the pool rebuild is complete.

        Returns:
            bool: True if pool rebuild is complete; False otherwise

        """
        self.get_info()
        return self.info.pi_rebuild_st.rs_done == 1

    def wait_for_rebuild(self, to_start, interval=1):
        """Wait for the rebuild to start or end.

        Args:
            to_start (bool): whether to wait for rebuild to start or end
            interval (int): number of seconds to wait in between rebuild
                completion checks
        """
        self.log.info(
            "Waiting for rebuild to %s ...",
            "start" if to_start else "complete")
        while self.rebuild_complete() == to_start:
            self.log.info(
                "  Rebuild %s ...",
                "has not yet started" if to_start else "in progress")
            sleep(interval)
        self.log.info(
            "Rebuild %s detected", "start" if to_start else "completion")

    @fail_on(DaosApiError)
    def start_rebuild(self, server_group, rank, daos_log):
        """Kill a specific server rank using this pool.

        Args:
            server_group (str): daos server group name
            rank (int): daos server rank to kill
            daos_log (DaosLog): object for logging messages

        Returns:
            bool: True if the server has been killed and the rank has been
            excluded from the pool; False if the pool is undefined

        """
        msg = "Killing DAOS server {} (rank {})".format(server_group, rank)
        self.log.info(msg)
        daos_log.info(msg)
        server = DaosServer(self.context, server_group, rank)
        server.kill(1)
        return self.exclude(rank, daos_log)

    @fail_on(DaosApiError)
    def exclude(self, rank, daos_log):
        """Manually exclude a rank from this pool.

        Args:
            rank (int): daos server rank to kill
            daos_log (DaosLog): object for logging messages

        Returns:
            bool: True if rank has been excluded from the pool; False if the
                pool is undefined

        """
        if self.pool:
            msg = "Excluding server rank {} from pool {}".format(
                rank, self.uuid)
            self.log.info(msg)
            daos_log.info(msg)
            self.pool.exclude([rank])
            return True
        return False

    def check_files(self, hosts):
        """Check if pool files exist on the specified list of hosts.

        Args:
            hosts (list): list of hosts

        Returns:
            bool: True if the files for this pool exist on each host; False
                otherwise

        """
        return check_pool_files(self.log, hosts, self.uuid.lower())

    def write_file(self, orterun, processes, hostfile, size, timeout=60):
        """Write a file to the pool.

        Args:
            orterun (str): full path to the orterun command
            processes (int): number of processes to launch
            hosts (list): list of clients from which to write the file
            size (int): size of the file to create in bytes
            timeout (int, optional): number of seconds before timing out the
                command. Defaults to 60 seconds.

        Returns:
            process.CmdResult: command execution result

        """
        self.log.info("Writing {} bytes to pool {}".format(size, self.uuid))
        env = {
            "DAOS_POOL": self.uuid,
            "DAOS_SVCL": "1",
            "DAOS_SINGLETON_CLI": "1",
            "PYTHONPATH": os.getenv("PYTHONPATH", ""),
        }
        current_path = os.path.dirname(os.path.abspath(__file__))
        command = "{} --np {} --hostfile {} {} {} testfile".format(
            orterun, processes, hostfile,
            os.path.join(current_path, "write_some_data.py"), size)
        return process.run(command, timeout, True, False, "both", True, env)

    def get_pool_daos_space(self):
        """Get the pool info daos space attributes as a dictionary.

        Returns:
            dict: a dictionary of lists of the daos space attributes

        """
        self.get_info()
        keys = ("s_total", "s_free")
        return {key: getattr(self.info.pi_space.ps_space, key) for key in keys}

    def display_pool_daos_space(self, msg=None):
        """Display the pool info daos space attributes.

        Args:
            msg (str, optional): optional text to include in the output.
                Defaults to None.
        """
        daos_space = self.get_pool_daos_space()
        sizes = [
            "{}[{}]={}".format(key, index, item)
            for key in sorted(daos_space.keys())
            for index, item in enumerate(daos_space[key])]
        self.log.info(
            "Pool %s space%s:\n  %s", self.uuid,
            " " + msg if isinstance(msg, str) else "", "\n  ".join(sizes))
Пример #16
0
    def test_rebuild_with_io(self):
        """
        Test ID: Rebuild-003

        Test Description: Trigger a rebuild while I/O is ongoing.

        Use Cases:
          -- single pool, single client performing continous read/write/verify
             sequence while failure/rebuild is triggered in another process

        :avocado: tags=pool,rebuild,rebuildwithio
        """

        # the rebuild tests need to redo this stuff each time so not in setup
        # as it usually would be
        server_group = self.params.get("name", '/server_config/',
                                       'daos_server')

        self.hostlist_servers = self.params.get("test_machines", '/run/hosts/')
        hostfile_servers = write_host_file.write_host_file(
            self.hostlist_servers, self.workdir)

        try:
            self.agent_sessions = agent_utils.run_agent(self.basepath,
                                                        self.hostlist_servers)
            server_utils.run_server(hostfile_servers, server_group,
                                    self.basepath)

            # use the uid/gid of the user running the test, these should
            # be perfectly valid
            createuid = os.geteuid()
            creategid = os.getegid()

            # parameters used in pool create that are in yaml
            createmode = self.params.get("mode", '/run/testparams/createmode/')
            createsetid = self.params.get("setname",
                                          '/run/testparams/createset/')
            createsize = self.params.get("size", '/run/testparams/createsize/')

            # initialize a python pool object then create the underlying
            # daos storage
            pool = DaosPool(self.context)
            pool.create(createmode, createuid, creategid,
                        createsize, createsetid, None)
            pool.connect(1 << 1)
            container = DaosContainer(self.context)
            container.create(pool.handle)
            container.open()

            # get pool status and make sure it all looks good before we start
            pool.pool_query()
            if pool.pool_info.pi_ndisabled != 0:
                self.fail("Number of disabled targets reporting incorrectly.\n")
            if pool.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error but rebuild hasn't run.\n")
            if pool.pool_info.pi_rebuild_st.rs_done != 1:
                self.fail("Rebuild is running but device hasn't failed yet.\n")
            if pool.pool_info.pi_rebuild_st.rs_obj_nr != 0:
                self.fail("Rebuilt objs not zero.\n")
            if pool.pool_info.pi_rebuild_st.rs_rec_nr != 0:
                self.fail("Rebuilt recs not zero.\n")
            dummy_pool_version = pool.pool_info.pi_rebuild_st.rs_version

            # do I/O for 30 seconds
            dummy_bw = io_utilities.continuous_io(container, 30)

            # trigger the rebuild
            rank = self.params.get("rank", '/run/testparams/ranks/*')
            server = DaosServer(self.context, server_group, rank)
            server.kill(1)
            pool.exclude([rank])

            # do another 30 seconds of I/O,
            # waiting for some improvements in server bootstrap
            # at which point we can move the I/O to a separate client and
            # really pound it with I/O
            dummy_bw = io_utilities.continuous_io(container, 30)

            # wait for the rebuild to finish
            while True:
                pool.pool_query()
                if pool.pool_info.pi_rebuild_st.rs_done == 1:
                    break
                else:
                    time.sleep(2)

            # check rebuild statistics
            if pool.pool_info.pi_ndisabled != 1:
                self.fail("Number of disabled targets reporting incorrectly: {}"
                          .format(pool.pool_info.pi_ndisabled))
            if pool.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error reported: {}".format(
                    pool.pool_info.pi_rebuild_st.rs_errno))
            if pool.pool_info.pi_rebuild_st.rs_obj_nr <= 0:
                self.fail("No objects have been rebuilt.")
            if pool.pool_info.pi_rebuild_st.rs_rec_nr <= 0:
                self.fail("No records have been rebuilt.")

        except (ValueError, DaosApiError) as excep:
            print(excep)
            print(traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")

        finally:
            # wait for the I/O process to finish
            try:
                server_utils.stop_server(hosts=self.hostlist_servers)
                os.remove(hostfile_servers)
                # really make sure everything is gone
                check_for_pool.cleanup_pools(self.hostlist_servers)
            finally:
                if self.agent_sessions:
                    agent_utils.stop_agent(self.agent_sessions)
                server_utils.kill_server(self.hostlist_servers)
Пример #17
0
class RebuildNoCap(Test):

    """
    Test Class Description:
    This class contains tests for pool rebuild.

    :avocado: tags=pool,rebuild,nocap
    """
    def setUp(self):
        """ setup for the test """
        self.agent_sessions = None
        # get paths from the build_vars generated by build
        with open('../../../.build_vars.json') as build_file:
            build_paths = json.load(build_file)
        self.context = DaosContext(build_paths['PREFIX'] + '/lib/')
        self.basepath = os.path.normpath(build_paths['PREFIX'] + "/../")

        # generate a hostfile
        self.hostlist = self.params.get("test_machines", '/run/hosts/')
        self.hostfile = write_host_file.write_host_file(self.hostlist,
                                                        self.workdir)

        # fire up the DAOS servers
        self.server_group = self.params.get("name", '/run/server_config/',
                                            'daos_server')
        self.agent_sessions = AgentUtils.run_agent(self.basepath, self.hostlist)
        server_utils.run_server(self.hostfile, self.server_group,
                                build_paths['PREFIX'] + '/../')

        # create a pool to test with
        createmode = self.params.get("mode", '/run/pool/createmode/')
        createuid = self.params.get("uid", '/run/pool/createuid/')
        creategid = self.params.get("gid", '/run/pool/creategid/')
        createsetid = self.params.get("setname", '/run/pool/createset/')
        createsize = self.params.get("size", '/run/pool/createsize/')
        self.pool = DaosPool(self.context)
        self.pool.create(createmode, createuid, creategid, createsize,
                         createsetid)
        uuid = self.pool.get_uuid_str()

        time.sleep(2)

        # stuff some bogus data into the pool
        how_many_bytes = long(self.params.get("datasize",
                                              '/run/testparams/datatowrite/'))
        exepath = os.path.join(build_paths['PREFIX'],
                               "/../src/tests/ftest/util/write_some_data.py")
        cmd = "export DAOS_POOL={0}; export DAOS_SVCL=1; mpirun"\
              " --np 1 --host {1} {2} {3} testfile".format(
                  uuid, self.hostlist[0], exepath, how_many_bytes)
        subprocess.call(cmd, shell=True)

    def tearDown(self):
        """ cleanup after the test """

        try:
            os.remove(self.hostfile)
            if self.pool:
                self.pool.destroy(1)
        finally:
            if self.agent_sessions:
                AgentUtils.stop_agent(self.hostlist, self.agent_sessions)
            server_utils.stop_server(hosts=self.hostlist)


    def test_rebuild_no_capacity(self):
        """
        :avocado: tags=pool,rebuild,nocap
        """
        try:
            print("\nsetup complete, starting test\n")

            # create a server object that references on of our pool target hosts
            # and then kill it
            svr_to_kill = int(self.params.get("rank_to_kill",
                                              '/run/testparams/ranks/'))
            d_server = DaosServer(self.context, bytes(self.server_group),
                                  svr_to_kill)

            time.sleep(1)
            d_server.kill(1)

            # exclude the target from the dead server
            self.pool.exclude([svr_to_kill])

            # exclude should trigger rebuild, check
            self.pool.connect(1 << 1)
            status = self.pool.pool_query()
            if not status.pi_ntargets == len(self.hostlist):
                self.fail("target count wrong.\n")
            if not status.pi_ndisabled == 1:
                self.fail("disabled target count wrong.\n")

            # the pool should be too full to start a rebuild so
            # expecting an error
            # not sure yet specifically what error
            if status.pi_rebuild_st.rs_errno == 0:
                self.fail("expecting rebuild to fail but it didn't.\n")

        except DaosApiError as excep:
            print(excep)
            print(traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")
Пример #18
0
class PoolSvc(Test):
    """
    Tests svc argument while pool create.

    """
    def setUp(self):
        # get paths from the build_vars generated by build
        with open('../../../.build_vars.json') as f:
            build_paths = json.load(f)
        self.basepath = os.path.normpath(build_paths['PREFIX']  + "/../")
        self.tmp = build_paths['PREFIX'] + '/tmp'

        self.server_group = self.params.get("server_group",'/server/','daos_server')
        self.daosctl = self.basepath + '/install/bin/daosctl'

        # setup the DAOS python API
        self.Context = DaosContext(build_paths['PREFIX'] + '/lib/')
        self.POOL = None

        self.hostfile = None
        self.hostlist = self.params.get("test_machines",'/run/hosts/*')
        self.hostfile = WriteHostFile.WriteHostFile(self.hostlist, self.tmp)
        print("Host file is: {}".format(self.hostfile))

        ServerUtils.runServer(self.hostfile, self.server_group, self.basepath)
        time.sleep(5)

    def tearDown(self):
        try:
            if self.hostfile is not None:
                os.remove(self.hostfile)
            if self.POOL is not None and self.POOL.attached:
                self.POOL.destroy(1)
        finally:
            ServerUtils.stopServer(hosts=self.hostlist)

    def test_poolsvc(self):
        """
        Test svc arg during pool create.

        :avocado: tags=pool,svc
        """

        # parameters used in pool create
        createmode = self.params.get("mode",'/run/createtests/createmode/*/')
        createuid  = os.geteuid()
        creategid  = os.getegid()
        createsetid = self.params.get("setname",'/run/createtests/createset/')
        createsize  = self.params.get("size",'/run/createtests/createsize/')
        createsvc  = self.params.get("svc",'/run/createtests/createsvc/*/')

        expected_result = createsvc[1]

        try:
            # initialize a python pool object then create the underlying
            # daos storage
            self.POOL = DaosPool(self.Context)
            self.POOL.create(createmode, createuid, creategid,
                    createsize, createsetid, None, None, createsvc[0])
            self.POOL.connect(1 << 1)
            # checking returned rank list value for single server
            if ((len(self.hostlist) == 1) and (int(self.POOL.svc.rl_ranks[i] != 0))):
                self.fail("Incorrect returned rank list value for single server")
            # checking returned rank list for server more than 1
            i = 0
            while ((int(self.POOL.svc.rl_ranks[i]) > 0) and \
                  (int(self.POOL.svc.rl_ranks[i]) <= createsvc[0]) and \
                  (int(self.POOL.svc.rl_ranks[i]) != 999999)):
                i +=1
            if i != createsvc[0]:
                self.fail("Length of Returned Rank list is not equal to" \
                          " the number of Pool Service members.\n")
            list = []
            for j in range(createsvc[0]):
                list.append(int(self.POOL.svc.rl_ranks[j]))
                if len(list) != len(set(list)):
                    self.fail("Duplicate values in returned rank list")

            if (createsvc[0] == 3):
                self.POOL.disconnect()
                cmd = ('{0} kill-leader  --uuid={1}'
                        .format(self.daosctl, self.POOL.get_uuid_str()))
                process.system(cmd)
                self.POOL.connect(1 << 1)
                self.POOL.disconnect()
                server = DaosServer(self.Context, self.server_group, 2)
                server.kill(1)
                self.POOL.exclude([2])
                self.POOL.connect(1 << 1)

            if expected_result in ['FAIL']:
                self.fail("Test was expected to fail but it passed.\n")

        except DaosApiError as e:
            print(e)
            print(traceback.format_exc())
            if expected_result == 'PASS':
                self.fail("Test was expected to pass but it failed.\n")
Пример #19
0
    def test_exclude(self):
        """
        Pass bad parameters to pool connect

        :avocado: tags=pool,poolexclude,badparam,badexclude
        """
        global basepath

        # parameters used in pool create
        createmode = self.params.get("mode", '/run/excludetests/createmode/')
        createuid = self.params.get("uid", '/run/excludetests/createuid/')
        creategid = self.params.get("gid", '/run/excludetests/creategid/')
        createsetid = self.params.get("setname",
                                      '/run/excludetests/createset/')
        createsize = self.params.get("size", '/run/excludetests/createsize/')

        # Accumulate a list of pass/fail indicators representing what is
        # expected for each parameter then "and" them to determine the
        # expected result of the test
        expected_for_param = []

        tgtlist = self.params.get("ranklist", '/run/excludetests/tgtlist/*/')
        targets = []
        targets.append(tgtlist[0])
        expected_for_param.append(tgtlist[1])

        svclist = self.params.get("ranklist", '/run/excludetests/svrlist/*/')
        svc = svclist[0]
        expected_for_param.append(svclist[1])

        setlist = self.params.get("setname",
                                  '/run/excludetests/connectsetnames/*/')
        connectset = setlist[0]
        expected_for_param.append(setlist[1])

        uuidlist = self.params.get("uuid", '/run/excludetests/UUID/*/')
        excludeuuid = uuidlist[0]
        expected_for_param.append(uuidlist[1])

        # if any parameter is FAIL then the test should FAIL, in this test
        # virtually everyone should FAIL since we are testing bad parameters
        expected_result = 'PASS'
        for result in expected_for_param:
            if result == 'FAIL':
                expected_result = 'FAIL'
                break

        try:
            # setup the DAOS python API
            with open('../../../.build_vars.json') as f:
                data = json.load(f)
            CONTEXT = DaosContext(data['PREFIX'] + '/lib/')

            # initialize a python pool object then create the underlying
            # daos storage
            POOL = DaosPool(CONTEXT)
            POOL.create(createmode, createuid, creategid, createsize,
                        createsetid, None)

            # trash the the pool service rank list
            #if not svc == 'VALID':
            #    rl_ranks = ctypes.POINTER(ctypes.c_uint)()
            #    POOL.svc = RankList(rl_ranks, 1);

            # trash the pool group value
            #if connectset == None:
            #    POOL.group = None

            # trash the UUID value in various ways
            #if excludeuuid == None:
            #    POOL.uuid = None
            #if excludeuuid == 'CRAP':
            #    POOL.uuid[4] = 244

            POOL.exclude(targets)

            if expected_result in ['FAIL']:
                self.fail("Test was expected to fail but it passed.\n")

        except ValueError as e:
            print e
            print traceback.format_exc()
            if expected_result in ['PASS']:
                self.fail("Test was expected to pass but it failed.\n")
Пример #20
0
    def test_multipool_rebuild(self):
        """
        Test ID: Rebuild-002
        Test Description: Expand on the basic test by rebuilding 2
        pools at once.

        Use Cases:
          -- multipool rebuild, single client, various object and record counds

        :avocado: tags=pool,rebuild,rebuildmulti
        """

        # the rebuild tests need to redo this stuff each time so not in setup
        # as it usually would be
        setid = self.params.get("setname", '/run/testparams/setnames/')
        server_group = self.params.get("server_group", '/server/',
                                       'daos_server')

        basepath = os.path.normpath(self.build_paths['PREFIX'] + "/../")
        tmp = self.build_paths['PREFIX'] + '/tmp'

        self.hostlist = self.params.get("test_machines", '/run/hosts/')
        hostfile = WriteHostFile.WriteHostFile(self.hostlist, tmp)

        try:
            ServerUtils.runServer(hostfile, server_group, basepath)

            # use the uid/gid of the user running the test, these should
            # be perfectly valid
            createuid = os.geteuid()
            creategid = os.getegid()

            # parameters used in pool create that are in yaml
            createmode = self.params.get("mode", '/run/testparams/createmode/')
            createsetid = self.params.get("setname",
                                          '/run/testparams/createset/')
            createsize = self.params.get("size", '/run/testparams/createsize/')

            # initialize python pool object then create the underlying
            # daos storage, the way the code is now the pools should be
            # on the same storage and have the same service leader
            pool1 = DaosPool(self.Context)
            pool2 = DaosPool(self.Context)
            pool1.create(createmode, createuid, creategid, createsize,
                         createsetid, None)
            pool2.create(createmode, createuid, creategid, createsize,
                         createsetid, None)

            # want an open connection during rebuild
            pool1.connect(1 << 1)
            pool2.connect(1 << 1)

            # create containers
            container1 = DaosContainer(self.Context)
            container1.create(pool1.handle)
            container2 = DaosContainer(self.Context)
            container2.create(pool2.handle)

            # now open them
            container1.open()
            container2.open()

            # how many objects and records are we creating
            objcount = self.params.get("objcount",
                                       '/run/testparams/numobjects/*')
            reccount = self.params.get("reccount",
                                       '/run/testparams/numrecords/*')
            if objcount == 0:
                reccount = 0

            # which rank to write to and kill
            rank = self.params.get("rank", '/run/testparams/ranks/*')

            # how much data to write with each key
            size = self.params.get("size", '/run/testparams/datasize/')

            # Putting the same data in both pools, at least for now to simplify
            # checking its correct
            saved_data = []
            for i in range(0, objcount):
                obj = None
                for j in range(0, reccount):

                    # make some stuff up and write
                    dkey = ''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(5))
                    akey = ''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(5))
                    data = ''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(size))

                    obj, tx = container1.write_an_obj(data, len(data), dkey,
                                                      akey, obj, rank)
                    obj, tx = container2.write_an_obj(data, len(data), dkey,
                                                      akey, obj, rank)

                    saved_data.append((obj, dkey, akey, data, tx))

                    # read the data back and make sure its correct
                    # containers
                    data2 = container1.read_an_obj(size, dkey, akey, obj, tx)
                    if data != data2.value:
                        self.fail(
                            "Wrote data P1, read it back, didn't match\n")

                    # containers
                    data2 = container2.read_an_obj(size, dkey, akey, obj, tx)
                    if data != data2.value:
                        self.fail(
                            "Wrote data P2, read it back, didn't match\n")

            # kill a server
            server = DaosServer(self.Context, server_group, rank)
            server.kill(1)

            # temporarily, the exclude of a failed target must be done
            # manually
            pool1.exclude([rank])
            pool2.exclude([rank])

            # check that rebuild finishes, no errors, progress data as
            # know it to be.  Check pool 1 first then we'll check 2 below.
            while True:
                pool1.pool_query()
                if pool1.pool_info.pi_rebuild_st.rs_done == 1:
                    break
                else:
                    time.sleep(2)

            # check there are no errors and other data matches what we
            # apriori know to be true,
            if pool1.pool_info.pi_ndisabled != 1:
                self.fail(
                    "P1 number disabled targets reporting incorrectly: {}".
                    format(pool1.pool_info.pi_ndisabled))
            if pool1.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("P1 rebuild error reported: {}".format(
                    pool1.pool_info.pi_rebuild_st.rs_errno))
            if pool1.pool_info.pi_rebuild_st.rs_obj_nr != objcount:
                self.fail("P1 rebuilt objs not as expected: {0} {1}".format(
                    pool1.pool_info.pi_rebuild_st.rs_obj_nr, objcount))
            if pool1.pool_info.pi_rebuild_st.rs_rec_nr != (reccount *
                                                           objcount):
                self.fail("P1 rebuilt recs not as expected: {0} {1}".format(
                    pool1.pool_info.pi_rebuild_st.rs_rec_nr,
                    reccount * objcount))

            # now that the rebuild finished verify the records are correct
            for tup in saved_data:
                data2 = container1.read_an_obj(len(tup[3]), tup[1], tup[2],
                                               tup[0], tup[4])
                if tup[3] != data2.value:
                    self.fail("after rebuild data didn't check out")

            # now check the other pool
            while True:
                pool2.pool_query()
                if pool2.pool_info.pi_rebuild_st.rs_done == 1:
                    break
                else:
                    time.sleep(2)

            # check there are no errors and other data matches what we
            # apriori know to be true
            if pool2.pool_info.pi_ndisabled != 1:
                self.fail(
                    "Number disabled targets reporting incorrectly: {}".format(
                        pool2.pool_info.pi_ndisabled))
            if pool2.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error reported: {}".format(
                    pool2.pool_info.pi_rebuild_st.rs_errno))
            if pool2.pool_info.pi_rebuild_st.rs_obj_nr != objcount:
                self.fail("Rebuilt objs not as expected: {0} {1}".format(
                    pool2.pool_info.pi_rebuild_st.rs_obj_nr, objcount))
            if pool2.pool_info.pi_rebuild_st.rs_rec_nr != (reccount *
                                                           objcount):
                self.fail("Rebuilt recs not as expected: {0} {1}".format(
                    pool2.pool_info.pi_rebuild_st.rs_rec_nr,
                    (reccount * objcount)))

            # now that the rebuild finished verify the records are correct
            for tup in saved_data:
                data2 = container2.read_an_obj(len(tup[3]), tup[1], tup[2],
                                               tup[0], tup[4])
                if tup[3] != data2.value:
                    self.fail("after rebuild data didn't check out")

        except DaosApiError as e:
            print(e)
            print(traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")

        finally:
            ServerUtils.stopServer(hosts=self.hostlist)
            os.remove(hostfile)
            CheckForPool.CleanupPools(self.hostlist)
            ServerUtils.killServer(self.hostlist)
Пример #21
0
    def test_exclude(self):
        """
        Pass bad parameters to pool connect

        :avocado: tags=pool,poolexclude,badparam,badexclude
        """
        # parameters used in pool create
        createmode = self.params.get("mode", '/run/pool/createmode/')
        createsetid = self.params.get("setname", '/run/pool/createset/')
        createsize = self.params.get("size", '/run/pool/createsize/')

        createuid = os.geteuid()
        creategid = os.getegid()

        # Accumulate a list of pass/fail indicators representing what is
        # expected for each parameter then "and" them to determine the
        # expected result of the test
        expected_for_param = []

        tgtlist = self.params.get("ranklist", '/run/testparams/tgtlist/*/')
        targets = []

        if tgtlist[0] == "NULLPTR":
            targets = None
            self.cancel("skipping null pointer test until DAOS-1929 is fixed")
        else:
            targets.append(tgtlist[0])
        expected_for_param.append(tgtlist[1])

        svclist = self.params.get("ranklist", '/run/testparams/svrlist/*/')
        svc = svclist[0]
        expected_for_param.append(svclist[1])

        setlist = self.params.get("setname",
                                  '/run/testparams/connectsetnames/*/')
        connectset = setlist[0]
        expected_for_param.append(setlist[1])

        uuidlist = self.params.get("uuid", '/run/testparams/UUID/*/')
        excludeuuid = uuidlist[0]
        expected_for_param.append(uuidlist[1])

        # if any parameter is FAIL then the test should FAIL, in this test
        # virtually everyone should FAIL since we are testing bad parameters
        expected_result = 'PASS'
        for result in expected_for_param:
            if result == 'FAIL':
                expected_result = 'FAIL'
                break

        saved_svc = None
        saved_grp = None
        saved_uuid = None
        pool = None
        try:
            # setup the DAOS python API
            with open('../../../.build_vars.json') as f:
                data = json.load(f)
            context = DaosContext(data['PREFIX'] + '/lib/')

            # initialize a python pool object then create the underlying
            # daos storage
            pool = DaosPool(context)
            pool.create(createmode, createuid, creategid, createsize,
                        createsetid, None)

            # trash the the pool service rank list
            if not svc == 'VALID':
                self.cancel("skipping this test until DAOS-1931 is fixed")
                saved_svc = RankList(pool.svc.rl_ranks, pool.svc.rl_nr)
                pool.svc = None

            # trash the pool group value
            if connectset == 'NULLPTR':
                saved_grp = pool.group
                pool.group = None

            # trash the UUID value in various ways
            if excludeuuid == 'NULLPTR':
                self.cancel("skipping this test until DAOS-1932 is fixed")
                ctypes.memmove(saved_uuid, pool.uuid, 16)
                pool.uuid = 0
            if excludeuuid == 'CRAP':
                self.cancel("skipping this test until DAOS-1932 is fixed")
                ctypes.memmove(saved_uuid, pool.uuid, 16)
                pool.uuid[4] = 244

            pool.exclude(targets)

            if expected_result in ['FAIL']:
                self.fail("Test was expected to fail but it passed.\n")

        except DaosApiError as e:
            print(e)
            print(traceback.format_exc())
            if expected_result in ['PASS']:
                self.fail("Test was expected to pass but it failed.\n")
        finally:
            if pool is not None:
                if saved_svc is not None:
                    pool.svc = saved_svc
                if saved_grp is not None:
                    pool.group = saved_grp
                if saved_uuid is not None:
                    ctypes.memmove(pool.uuid, saved_uuid, 16)

                pool.destroy(1)
Пример #22
0
class DestroyRebuild(Test):

    """
    Test Class Description:
    This test verifies destruction of a pool that is rebuilding.

    :avocado: tags=pool,pooldestroy,rebuild,desreb
    """

    build_paths = []
    server_group = ""
    CONTEXT = None
    POOL = None
    hostfile = ""

    def setUp(self):
        """ setup for the test """

        # get paths from the build_vars generated by build
        with open('../../../.build_vars.json') as f:
              build_paths = json.load(f)
        self.CONTEXT = DaosContext(build_paths['PREFIX'] + '/lib/')

        # generate a hostfile
        self.hostlist = self.params.get("test_machines",'/run/hosts/')
        tmp = build_paths['PREFIX'] + '/tmp'
        self.hostfile = WriteHostFile.WriteHostFile(self.hostlist, tmp)

        # fire up the DAOS servers
        self.server_group = self.params.get("server_group",'/run/server/',
                                      'daos_server')
        ServerUtils.runServer(self.hostfile, self.server_group,
                             build_paths['PREFIX'] + '/../')
        time.sleep(3)

        # create a pool to test with
        createmode = self.params.get("mode",'/run/pool/createmode/')
        createuid  = self.params.get("uid",'/run/pool/createuid/')
        creategid  = self.params.get("gid",'/run/pool/creategid/')
        createsetid = self.params.get("setname",'/run/pool/createset/')
        createsize  = self.params.get("size",'/run/pool/createsize/')
        self.POOL = DaosPool(self.CONTEXT)
        self.POOL.create(createmode, createuid, creategid, createsize,
                        createsetid)
        uuid = self.POOL.get_uuid_str()

        time.sleep(2)

    def tearDown(self):
        """ cleanup after the test """

        try:
            os.remove(self.hostfile)
            if self.POOL:
                self.POOL.destroy(1)
        finally:
            ServerUtils.stopServer(hosts=self.hostlist)


    def test_destroy_while_rebuilding(self):
        """
        :avocado: tags=pool,pooldestroy,rebuild,desreb
        """
        try:
            print "\nsetup complete, starting test\n"

            # create a server object that references on of our pool target hosts
            # and then kill it
            svr_to_kill = int(self.params.get("rank_to_kill",
                                              '/run/testparams/ranks/'))
            sh = DaosServer(self.CONTEXT, bytes(self.server_group), svr_to_kill)

            print "created server "

            # BUG if you don't connect the rebuild doesn't start correctly
            self.POOL.connect(1 << 1)
            status = self.POOL.pool_query()
            if not status.pi_ntargets == len(self.hostlist):
                self.fail("target count wrong.\n")
            if not status.pi_ndisabled == 0:
                self.fail("disabled target count wrong.\n")

            print "connect "

            time.sleep(1)
            sh.kill(1)

            print "killed server "

            # exclude the target from the dead server
            self.POOL.exclude([svr_to_kill])

            print "exclude target "

            #self.POOL.disconnect()
            #print "disconnect "

            # the rebuild won't take long since there is no data so do
            # the destroy quickly
            self.POOL.destroy(1)
            print "destroy "

        except DaosApiError as e:
                print(e)
                print(traceback.format_exc())
                self.fail("Expecting to pass but test has failed.\n")
Пример #23
0
class PoolSvc(Test):
    """
    Tests svc argument while pool create.
    """
    def setUp(self):
        # get paths from the build_vars generated by build
        with open('../../../.build_vars.json') as build_file:
            build_paths = json.load(build_file)
        self.basepath = os.path.normpath(build_paths['PREFIX'] + "/../")

        self.server_group = self.params.get("name", '/server_config/',
                                            'daos_server')
        self.daosctl = self.basepath + '/install/bin/daosctl'

        # setup the DAOS python API
        self.context = DaosContext(build_paths['PREFIX'] + '/lib/')
        self.pool = None

        self.hostfile = None
        self.hostlist = self.params.get("test_machines", '/run/hosts/*')
        self.hostfile = write_host_file.write_host_file(self.hostlist,
                                                        self.workdir)
        print("Host file is: {}".format(self.hostfile))

        self.agent_sessions = AgentUtils.run_agent(self.basepath, self.hostlist)
        server_utils.run_server(self.hostfile, self.server_group, self.basepath)

    def tearDown(self):
        try:
            if self.pool is not None and self.pool.attached:
                self.pool.destroy(1)
        finally:
            if self.agent_sessions:
                AgentUtils.stop_agent(self.hostlist, self.agent_sessions)
            server_utils.stop_server(hosts=self.hostlist)

    def test_poolsvc(self):
        """
        Test svc arg during pool create.

        :avocado: tags=pool,svc
        """

        # parameters used in pool create
        createmode = self.params.get("mode", '/run/createtests/createmode/*/')
        createuid = os.geteuid()
        creategid = os.getegid()
        createsetid = self.params.get("setname", '/run/createtests/createset/')
        createsize = self.params.get("size", '/run/createtests/createsize/')
        createsvc = self.params.get("svc", '/run/createtests/createsvc/*/')

        expected_result = createsvc[1]

        try:
            # initialize a python pool object then create the underlying
            # daos storage
            self.pool = DaosPool(self.context)
            self.pool.create(createmode, createuid, creategid,
                             createsize, createsetid, None, None, createsvc[0])
            self.pool.connect(1 << 1)

            # checking returned rank list for server more than 1
            i = 0
            while (
                    int(self.pool.svc.rl_ranks[i]) > 0 and
                    int(self.pool.svc.rl_ranks[i]) <= createsvc[0] and
                    int(self.pool.svc.rl_ranks[i]) != 999999
            ):
                i += 1
            if i != createsvc[0]:
                self.fail("Length of Returned Rank list is not equal to "
                          "the number of Pool Service members.\n")
            rank_list = []
            for j in range(createsvc[0]):
                rank_list.append(int(self.pool.svc.rl_ranks[j]))
                if len(rank_list) != len(set(rank_list)):
                    self.fail("Duplicate values in returned rank list")

            if createsvc[0] == 3:
                self.pool.disconnect()
                cmd = ('{0} kill-leader  --uuid={1}'
                       .format(self.daosctl, self.pool.get_uuid_str()))
                process.system(cmd)
                self.pool.connect(1 << 1)
                self.pool.disconnect()
                server = DaosServer(self.context, self.server_group, 2)
                server.kill(1)
                self.pool.exclude([2])
                self.pool.connect(1 << 1)

            if expected_result in ['FAIL']:
                self.fail("Test was expected to fail but it passed.\n")

        except DaosApiError as excep:
            print(excep)
            print(traceback.format_exc())
            if expected_result == 'PASS':
                self.fail("Test was expected to pass but it failed.\n")