示例#1
0
    def test_poolsvc(self):
        """
        Test svc arg during pool create.

        :avocado: tags=pool,svc
        """

        # parameters used in pool create
        createmode = self.params.get("mode", '/run/createtests/createmode/*/')
        createuid = os.geteuid()
        creategid = os.getegid()
        createsetid = self.params.get("setname", '/run/createtests/createset/')
        createsize = self.params.get("size", '/run/createtests/createsize/')
        createsvc = self.params.get("svc", '/run/createtests/createsvc/*/')

        expected_result = createsvc[1]

        try:
            # initialize a python pool object then create the underlying
            # daos storage
            self.pool = DaosPool(self.context)
            self.pool.create(createmode, createuid, creategid, createsize,
                             createsetid, None, None, createsvc[0])
            self.pool.connect(1 << 1)

            # checking returned rank list for server more than 1
            iterator = 0
            while (int(self.pool.svc.rl_ranks[iterator]) > 0
                   and int(self.pool.svc.rl_ranks[iterator]) <= createsvc[0]
                   and int(self.pool.svc.rl_ranks[iterator]) != 999999):
                iterator += 1
                if iterator != createsvc[0]:
                    self.fail("Length of Returned Rank list is not equal to "
                              "the number of Pool Service members.\n")
            rank_list = []
            for iterator in range(createsvc[0]):
                rank_list.append(int(self.pool.svc.rl_ranks[iterator]))
                if len(rank_list) != len(set(rank_list)):
                    self.fail("Duplicate values in returned rank list")

            if createsvc[0] == 3:
                self.pool.disconnect()
                cmd = ('{0} kill-leader  --uuid={1}'.format(
                    self.daosctl, self.pool.get_uuid_str()))
                process.system(cmd)
                self.pool.connect(1 << 1)
                self.pool.disconnect()
                server = DaosServer(self.context, self.server_group, 2)
                server.kill(1)
                self.pool.exclude([2])
                self.pool.connect(1 << 1)

            if expected_result in ['FAIL']:
                self.fail("Test was expected to fail but it passed.\n")

        except DaosApiError as excep:
            print(excep)
            print(traceback.format_exc())
            if expected_result == 'PASS':
                self.fail("Test was expected to pass but it failed.\n")
示例#2
0
    def test_poolsvc(self):
        """
        Test svc arg during pool create.

        :avocado: tags=pool,svc
        """

        # parameters used in pool create
        createmode = self.params.get("mode", '/run/createtests/createmode/*/')
        createuid = os.geteuid()
        creategid = os.getegid()
        createsetid = self.params.get("setname", '/run/createtests/createset/')
        createsize = self.params.get("size", '/run/createtests/createsize/')
        createsvc = self.params.get("svc", '/run/createtests/createsvc/*/')

        expected_result = createsvc[1]

        try:
            # initialize a python pool object then create the underlying
            # daos storage
            self.POOL = DaosPool(self.Context)
            self.POOL.create(createmode, createuid, creategid, createsize,
                             createsetid, None, None, createsvc[0])
            self.POOL.connect(1 << 1)

            if (createsvc[0] == 3):
                self.POOL.disconnect()
                cmd = ('{0} kill-leader  --uuid={1}'.format(
                    self.daosctl, self.POOL.get_uuid_str()))
                process.system(cmd)
                time.sleep(5)
                self.POOL.connect(1 << 1)
                self.POOL.disconnect()
                server = DaosServer(self.Context, self.server_group, 1)
                server.kill(1)
                time.sleep(5)
                self.POOL.connect(1 << 1)

            if expected_result in ['FAIL']:
                self.fail("Test was expected to fail but it passed.\n")

            # cleanup the pool
            self.POOL.disconnect()
            self.POOL.destroy(1)
            self.POOL = None

        except ValueError as e:
            print e
            print traceback.format_exc()
            if expected_result == 'PASS':
                self.fail("Test was expected to pass but it failed.\n")
示例#3
0
    def test_destroy_while_rebuilding(self):
        """
        :avocado: tags=pool,pooldestroy,rebuild,desreb
        """
        try:
            print("\nsetup complete, starting test\n")

            # create a server object that references on of our pool target hosts
            # and then kill it
            svr_to_kill = int(self.params.get("rank_to_kill",
                                              '/run/testparams/ranks/'))
            server = DaosServer(self.context, bytes(self.server_group),
                                svr_to_kill)

            print("created server ")

            # BUG if you don't connect the rebuild doesn't start correctly
            self.pool.connect(1 << 1)
            status = self.pool.pool_query()
            if not status.pi_ntargets == len(self.hostlist):
                self.fail("target count wrong.\n")
            if not status.pi_ndisabled == 0:
                self.fail("disabled target count wrong.\n")

            print("connect ")

            time.sleep(1)
            server.kill(1)

            print("killed server ")

            # exclude the target from the dead server
            self.pool.exclude([svr_to_kill])

            print("exclude target ")

            #self.pool.disconnect()
            #print "disconnect "

            # the rebuild won't take long since there is no data so do
            # the destroy quickly
            self.pool.destroy(1)
            print("destroy ")

        except DaosApiError as excep:
            print(excep)
            print(traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")
示例#4
0
    def test_destroy_while_rebuilding(self):
        """
        :avocado: tags=pool,pooldestroy,rebuild,desreb
        """
        try:
            print("\nsetup complete, starting test\n")

            # create a server object that references on of our pool target hosts
            # and then kill it
            svr_to_kill = int(self.params.get("rank_to_kill",
                                              '/run/testparams/ranks/'))
            server = DaosServer(self.context, bytes(self.server_group),
                                svr_to_kill)

            print("created server ")

            # BUG if you don't connect the rebuild doesn't start correctly
            self.pool.connect(1 << 1)
            status = self.pool.pool_query()
            if not status.pi_ntargets == len(self.hostlist_servers):
                self.fail("target count wrong.\n")
            if not status.pi_ndisabled == 0:
                self.fail("disabled target count wrong.\n")

            print("connect ")

            time.sleep(1)
            server.kill(1)

            print("killed server ")

            # exclude the target from the dead server
            self.pool.exclude([svr_to_kill])

            print("exclude target ")

            #self.pool.disconnect()
            #print "disconnect "

            # the rebuild won't take long since there is no data so do
            # the destroy quickly
            self.pool.destroy(1)
            print("destroy ")

        except DaosApiError as excep:
            print(excep)
            print(traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")
示例#5
0
    def start_rebuild(self, server_group, rank, daos_log):
        """Kill a specific server rank using this pool.

        Args:
            server_group (str): daos server group name
            rank (int): daos server rank to kill
            daos_log (DaosLog): object for logging messages
        """
        msg = "Killing DAOS server {} (rank {})".format(server_group, rank)
        self.log.info(msg)
        daos_log.info(msg)
        server = DaosServer(self.context, server_group, rank)
        server.kill(1)
        msg = "Excluding server rank {} from pool {}".format(rank, self.uuid)
        self.log.info(msg)
        daos_log.info(msg)
        self.pool.exclude([rank])
示例#6
0
    def start_rebuild(self, server_group, rank, daos_log):
        """Kill a specific server rank using this pool.

        Args:
            server_group (str): daos server group name
            rank (int): daos server rank to kill
            daos_log (DaosLog): object for logging messages

        Returns:
            bool: True if the server has been killed and the rank has been
            excluded from the pool; False if the pool is undefined

        """
        msg = "Killing DAOS server {} (rank {})".format(server_group, rank)
        self.log.info(msg)
        daos_log.info(msg)
        server = DaosServer(self.context, server_group, rank)
        server.kill(1)
        return self.exclude(rank, daos_log)
示例#7
0
文件: test_utils.py 项目: paf-49/daos
    def start_rebuild(self, ranks, daos_log):
        """Kill the specific server ranks using this pool.

        Args:
            ranks (list): a list of daos server ranks (int) to kill
            daos_log (DaosLog): object for logging messages

        Returns:
            bool: True if the server ranks have been killed and the ranks have
            been excluded from the pool; False if the pool is undefined

        """
        msg = "Killing DAOS ranks {} from server group {}".format(
            ranks, self.name.value)
        self.log.info(msg)
        daos_log.info(msg)
        for rank in ranks:
            server = DaosServer(self.context, self.name.value, rank)
            server.kill(1)
        return self.exclude(ranks, daos_log)
示例#8
0
def kill_server(server_group, context, rank, pool, log=None):
    """Kill a specific server rank.

    Args:
        server_group (str): daos server group name
        context (DaosContext): the context to use to create the DaosServer
        rank (int): daos server rank to kill
        pool (DaosPool): the DaosPool from which to exclude the rank
        log (DaosLog|None): object for logging messages

    Returns:
        None

    """
    if log:
        log.info("Killing DAOS server {} (rank {})".format(server_group, rank))
    server = DaosServer(context, server_group, rank)
    server.kill(1)
    if log:
        log.info("Excluding server rank {}".format(rank))
    pool.exclude([rank])
示例#9
0
    def test_rebuild_no_capacity(self):
        """
        :avocado: tags=pool,rebuild,nocap
        """
        try:
            print "\nsetup complete, starting test\n"

            # create a server object that references on of our pool target hosts
            # and then kill it
            svr_to_kill = int(
                self.params.get("rank_to_kill", '/run/testparams/ranks/'))
            sh = DaosServer(self.CONTEXT, bytes(self.server_group),
                            svr_to_kill)

            time.sleep(1)
            sh.kill(1)

            # exclude the target from the dead server
            self.POOL.exclude([svr_to_kill])

            # exclude should trigger rebuild, check
            self.POOL.connect(1 << 1)
            status = self.POOL.pool_query()
            if not status.pi_ntargets == len(self.host_list):
                self.fail("target count wrong.\n")
            if not status.pi_ndisabled == 1:
                self.fail("disabled target count wrong.\n")

            # the pool should be too full to start a rebuild so
            # expecting an error
            # not sure yet specifically what error
            if status.pi_rebuild_st[2] == 0:
                self.fail("expecting rebuild to fail but it didn't.\n")

        except ValueError as e:
            print(e)
            print(traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")
示例#10
0
    def test_rebuild_no_capacity(self):
        """
        :avocado: tags=pool,rebuild,nocap
        """
        try:
            print("\nsetup complete, starting test\n")

            # create a server object that references on of our pool target hosts
            # and then kill it
            svr_to_kill = int(self.params.get("rank_to_kill",
                                              '/run/testparams/ranks/'))
            d_server = DaosServer(self.context, bytes(self.server_group),
                                  svr_to_kill)

            time.sleep(1)
            d_server.kill(1)

            # exclude the target from the dead server
            self.pool.exclude([svr_to_kill])

            # exclude should trigger rebuild, check
            self.pool.connect(1 << 1)
            status = self.pool.pool_query()
            if not status.pi_ntargets == len(self.hostlist):
                self.fail("target count wrong.\n")
            if not status.pi_ndisabled == 1:
                self.fail("disabled target count wrong.\n")

            # the pool should be too full to start a rebuild so
            # expecting an error
            # not sure yet specifically what error
            if status.pi_rebuild_st.rs_errno == 0:
                self.fail("expecting rebuild to fail but it didn't.\n")

        except DaosApiError as excep:
            print(excep)
            print(traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")
示例#11
0
    def test_rebuild_with_io(self):
        """
        Test ID: Rebuild-003

        Test Description: Trigger a rebuild while I/O is ongoing.

        Use Cases:
          -- single pool, single client performing continous read/write/verify
             sequence while failure/rebuild is triggered in another process

        :avocado: tags=pool,rebuild,rebuildwithio
        """

        # the rebuild tests need to redo this stuff each time so not in setup
        # as it usually would be
        server_group = self.params.get("name", '/server_config/',
                                       'daos_server')

        self.hostlist_servers = self.params.get("test_machines", '/run/hosts/')
        hostfile_servers = write_host_file.write_host_file(
            self.hostlist_servers, self.workdir)

        try:
            self.agent_sessions = agent_utils.run_agent(self.basepath,
                                                        self.hostlist_servers)
            server_utils.run_server(hostfile_servers, server_group,
                                    self.basepath)

            # use the uid/gid of the user running the test, these should
            # be perfectly valid
            createuid = os.geteuid()
            creategid = os.getegid()

            # parameters used in pool create that are in yaml
            createmode = self.params.get("mode", '/run/testparams/createmode/')
            createsetid = self.params.get("setname",
                                          '/run/testparams/createset/')
            createsize = self.params.get("size", '/run/testparams/createsize/')

            # initialize a python pool object then create the underlying
            # daos storage
            pool = DaosPool(self.context)
            pool.create(createmode, createuid, creategid,
                        createsize, createsetid, None)
            pool.connect(1 << 1)
            container = DaosContainer(self.context)
            container.create(pool.handle)
            container.open()

            # get pool status and make sure it all looks good before we start
            pool.pool_query()
            if pool.pool_info.pi_ndisabled != 0:
                self.fail("Number of disabled targets reporting incorrectly.\n")
            if pool.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error but rebuild hasn't run.\n")
            if pool.pool_info.pi_rebuild_st.rs_done != 1:
                self.fail("Rebuild is running but device hasn't failed yet.\n")
            if pool.pool_info.pi_rebuild_st.rs_obj_nr != 0:
                self.fail("Rebuilt objs not zero.\n")
            if pool.pool_info.pi_rebuild_st.rs_rec_nr != 0:
                self.fail("Rebuilt recs not zero.\n")
            dummy_pool_version = pool.pool_info.pi_rebuild_st.rs_version

            # do I/O for 30 seconds
            dummy_bw = io_utilities.continuous_io(container, 30)

            # trigger the rebuild
            rank = self.params.get("rank", '/run/testparams/ranks/*')
            server = DaosServer(self.context, server_group, rank)
            server.kill(1)
            pool.exclude([rank])

            # do another 30 seconds of I/O,
            # waiting for some improvements in server bootstrap
            # at which point we can move the I/O to a separate client and
            # really pound it with I/O
            dummy_bw = io_utilities.continuous_io(container, 30)

            # wait for the rebuild to finish
            while True:
                pool.pool_query()
                if pool.pool_info.pi_rebuild_st.rs_done == 1:
                    break
                else:
                    time.sleep(2)

            # check rebuild statistics
            if pool.pool_info.pi_ndisabled != 1:
                self.fail("Number of disabled targets reporting incorrectly: {}"
                          .format(pool.pool_info.pi_ndisabled))
            if pool.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error reported: {}".format(
                    pool.pool_info.pi_rebuild_st.rs_errno))
            if pool.pool_info.pi_rebuild_st.rs_obj_nr <= 0:
                self.fail("No objects have been rebuilt.")
            if pool.pool_info.pi_rebuild_st.rs_rec_nr <= 0:
                self.fail("No records have been rebuilt.")

        except (ValueError, DaosApiError) as excep:
            print(excep)
            print(traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")

        finally:
            # wait for the I/O process to finish
            try:
                server_utils.stop_server(hosts=self.hostlist_servers)
                os.remove(hostfile_servers)
                # really make sure everything is gone
                check_for_pool.cleanup_pools(self.hostlist_servers)
            finally:
                if self.agent_sessions:
                    agent_utils.stop_agent(self.agent_sessions)
                server_utils.kill_server(self.hostlist_servers)
示例#12
0
    def test_multipool_rebuild(self):
        """
        Test ID: Rebuild-002
        Test Description: Expand on the basic test by rebuilding 2
        pools at once.

        Use Cases:
          -- multipool rebuild, single client, various object and record counds

        :avocado: tags=pool,rebuild,rebuildmulti
        """
        try:
            # initialize python pool object then create the underlying
            # daos storage, the way the code is now the pools should be
            # on the same storage and have the same service leader
            pool1 = DaosPool(self.context)
            pool2 = DaosPool(self.context)
            pool1.create(self.createmode, self.createuid, self.creategid,
                         self.createsize, self.createsetid)
            pool2.create(self.createmode, self.createuid, self.creategid,
                         self.createsize, self.createsetid)

            # want an open connection during rebuild
            pool1.connect(1 << 1)
            pool2.connect(1 << 1)

            # create containers
            container1 = DaosContainer(self.context)
            container1.create(pool1.handle)
            container2 = DaosContainer(self.context)
            container2.create(pool2.handle)

            # now open them
            container1.open()
            container2.open()

            # Putting the same data in both pools, at least for now to simplify
            # checking its correct
            saved_data = []
            for _objc in range(self.objcount):
                obj = None
                for _recc in range(self.reccount):

                    # make some stuff up and write
                    dkey = (''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(5)))
                    akey = (''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(5)))
                    data = (''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(self.size)))

                    # Used DAOS_OC_R1S_SPEC_RANK
                    # 1 replica with specified rank
                    obj, txn = container1.write_an_obj(data,
                                                       len(data),
                                                       dkey,
                                                       akey,
                                                       obj,
                                                       self.rank,
                                                       obj_cls=15)
                    obj, txn = container2.write_an_obj(data,
                                                       len(data),
                                                       dkey,
                                                       akey,
                                                       obj,
                                                       self.rank,
                                                       obj_cls=15)
                    saved_data.append((obj, dkey, akey, data, txn))

                    # read the data back and make sure its correct containers
                    data2 = container1.read_an_obj(self.size, dkey, akey, obj,
                                                   txn)
                    if data != data2.value:
                        self.fail(
                            "Wrote data P1, read it back, didn't match\n")
                    data2 = container2.read_an_obj(self.size, dkey, akey, obj,
                                                   txn)
                    if data != data2.value:
                        self.fail(
                            "Wrote data P2, read it back, didn't match\n")

            # kill a server
            server = DaosServer(self.context, self.server_group, self.rank)
            server.kill(1)

            # temporarily, the exclude of a failed target must be done
            # manually
            pool1.exclude([self.rank])
            pool2.exclude([self.rank])

            # check that rebuild finishes, no errors, progress data as
            # know it to be.  Check pool 1 first then we'll check 2 below.
            while True:
                pool1.pool_query()
                if pool1.pool_info.pi_rebuild_st.rs_done == 1:
                    break
                else:
                    time.sleep(2)

            # check there are no errors and other data matches what we
            # apriori know to be true,
            if pool1.pool_info.pi_ndisabled != 1:
                self.fail(
                    "P1 number disabled targets reporting incorrectly: {}".
                    format(pool1.pool_info.pi_ndisabled))
            if pool1.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("P1 rebuild error reported: {}".format(
                    pool1.pool_info.pi_rebuild_st.rs_errno))
            if pool1.pool_info.pi_rebuild_st.rs_obj_nr != self.objcount:
                self.fail("P1 rebuilt objs not as expected: {0} {1}".format(
                    pool1.pool_info.pi_rebuild_st.rs_obj_nr, self.objcount))
            if (pool1.pool_info.pi_rebuild_st.rs_rec_nr !=
                (self.reccount * self.objcount)):
                self.fail("P1 rebuilt recs not as expected: {0} {1}".format(
                    pool1.pool_info.pi_rebuild_st.rs_rec_nr,
                    self.reccount * self.objcount))

            # now that the rebuild finished verify the records are correct
            for tup in saved_data:
                data2 = container1.read_an_obj(len(tup[3]), tup[1], tup[2],
                                               tup[0], tup[4])
                if tup[3] != data2.value:
                    self.fail("after rebuild data didn't check out")

            # now check the other pool
            while True:
                pool2.pool_query()
                if pool2.pool_info.pi_rebuild_st.rs_done == 1:
                    break
                else:
                    time.sleep(2)

            # check there are no errors and other data matches what we
            # apriori know to be true
            if pool2.pool_info.pi_ndisabled != 1:
                self.fail(
                    "Number disabled targets reporting incorrectly: {}".format(
                        pool2.pool_info.pi_ndisabled))
            if pool2.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error reported: {}".format(
                    pool2.pool_info.pi_rebuild_st.rs_errno))
            if pool2.pool_info.pi_rebuild_st.rs_obj_nr != self.objcount:
                self.fail("Rebuilt objs not as expected: {0} {1}".format(
                    pool2.pool_info.pi_rebuild_st.rs_obj_nr, self.objcount))
            if (pool2.pool_info.pi_rebuild_st.rs_rec_nr !=
                (self.reccount * self.objcount)):
                self.fail("Rebuilt recs not as expected: {0} {1}".format(
                    pool2.pool_info.pi_rebuild_st.rs_rec_nr,
                    (self.reccount * self.objcount)))

            # now that the rebuild finished verify the records are correct
            for tup in saved_data:
                data2 = container2.read_an_obj(len(tup[3]), tup[1], tup[2],
                                               tup[0], tup[4])
                if tup[3] != data2.value:
                    self.fail("after rebuild data didn't check out")

        except DaosApiError as excp:
            print(excp)
            print(traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")

        finally:
            server_utils.stop_server(hosts=self.hostlist_servers)
            check_for_pool.cleanup_pools(self.hostlist_servers)
            server_utils.kill_server(self.hostlist_servers)
示例#13
0
    def test_rebuild_with_io(self):
        """
        Test ID: Rebuild-003

        Test Description: Trigger a rebuild while I/O is ongoing.

        Use Cases:
          -- single pool, single client performing continous read/write/verify
             sequence while failure/rebuild is triggered in another process

        :avocado: tags=pool,rebuild,rebuildwithio
        """

        # the rebuild tests need to redo this stuff each time so not in setup
        # as it usually would be
        server_group = self.params.get("name", '/server_config/',
                                       'daos_server')

        basepath = os.path.normpath(self.build_paths['PREFIX'] + "/../")

        self.hostlist = self.params.get("test_machines", '/run/hosts/')
        hostfile = write_host_file.write_host_file(self.hostlist, self.workdir)

        try:
            self.agent_sessions = AgentUtils.run_agent(basepath, self.hostlist)
            server_utils.run_server(hostfile, server_group, basepath)

            # use the uid/gid of the user running the test, these should
            # be perfectly valid
            createuid = os.geteuid()
            creategid = os.getegid()

            # parameters used in pool create that are in yaml
            createmode = self.params.get("mode", '/run/testparams/createmode/')
            createsetid = self.params.get("setname",
                                          '/run/testparams/createset/')
            createsize = self.params.get("size", '/run/testparams/createsize/')

            # initialize a python pool object then create the underlying
            # daos storage
            pool = DaosPool(self.context)
            pool.create(createmode, createuid, creategid,
                        createsize, createsetid, None)
            pool.connect(1 << 1)
            container = DaosContainer(self.context)
            container.create(pool.handle)
            container.open()

            # get pool status and make sure it all looks good before we start
            pool.pool_query()
            if pool.pool_info.pi_ndisabled != 0:
                self.fail("Number of disabled targets reporting incorrectly.\n")
            if pool.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error but rebuild hasn't run.\n")
            if pool.pool_info.pi_rebuild_st.rs_done != 1:
                self.fail("Rebuild is running but device hasn't failed yet.\n")
            if pool.pool_info.pi_rebuild_st.rs_obj_nr != 0:
                self.fail("Rebuilt objs not zero.\n")
            if pool.pool_info.pi_rebuild_st.rs_rec_nr != 0:
                self.fail("Rebuilt recs not zero.\n")
            dummy_pool_version = pool.pool_info.pi_rebuild_st.rs_version

            # do I/O for 30 seconds
            dummy_bw = io_utilities.continuous_io(container, 30)

            # trigger the rebuild
            rank = self.params.get("rank", '/run/testparams/ranks/*')
            server = DaosServer(self.context, server_group, rank)
            server.kill(1)
            pool.exclude([rank])

            # do another 30 seconds of I/O,
            # waiting for some improvements in server bootstrap
            # at which point we can move the I/O to a separate client and
            # really pound it with I/O
            dummy_bw = io_utilities.continuous_io(container, 30)

            # wait for the rebuild to finish
            while True:
                pool.pool_query()
                if pool.pool_info.pi_rebuild_st.rs_done == 1:
                    break
                else:
                    time.sleep(2)

            # check rebuild statistics
            if pool.pool_info.pi_ndisabled != 1:
                self.fail("Number of disabled targets reporting incorrectly: {}"
                          .format(pool.pool_info.pi_ndisabled))
            if pool.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error reported: {}".format(
                    pool.pool_info.pi_rebuild_st.rs_errno))
            if pool.pool_info.pi_rebuild_st.rs_obj_nr <= 0:
                self.fail("No objects have been rebuilt.")
            if pool.pool_info.pi_rebuild_st.rs_rec_nr <= 0:
                self.fail("No records have been rebuilt.")

        except (ValueError, DaosApiError) as excep:
            print(excep)
            print(traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")

        finally:
            # wait for the I/O process to finish
            try:
                server_utils.stop_server(hosts=self.hostlist)
                os.remove(hostfile)
                # really make sure everything is gone
                check_for_pool.cleanup_pools(self.hostlist)
            finally:
                if self.agent_sessions:
                    AgentUtils.stop_agent(self.hostlist, self.agent_sessions)
                server_utils.kill_server(self.hostlist)
示例#14
0
    def test_simple_rebuild(self):
        """
        Test ID: Rebuild-001

        Test Description: The most basic rebuild test.

        Use Cases:
          -- single pool rebuild, single client, various reord/object
             counts

        :avocado: tags=pool,rebuild,rebuildsimple
        """
        try:

            # initialize a python pool object then create the underlying
            # daos storage
            pool = DaosPool(self.context)
            pool.create(self.createmode, self.createuid, self.creategid,
                        self.createsize, self.createsetid)

            # want an open connection during rebuild
            pool.connect(1 << 1)

            # get pool status we want to test later
            pool.pool_query()
            if pool.pool_info.pi_ndisabled != 0:
                self.fail(
                    "Number of disabled targets reporting incorrectly.\n")
            if pool.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error but rebuild hasn't run.\n")
            if pool.pool_info.pi_rebuild_st.rs_done != 1:
                self.fail("Rebuild is running but device hasn't failed yet.\n")
            if pool.pool_info.pi_rebuild_st.rs_obj_nr != 0:
                self.fail("Rebuilt objs not zero.\n")
            if pool.pool_info.pi_rebuild_st.rs_rec_nr != 0:
                self.fail("Rebuilt recs not zero.\n")

            # create a container
            container = DaosContainer(self.context)
            container.create(pool.handle)

            # now open it
            container.open()

            saved_data = []
            for _objc in range(self.objcount):
                obj = None
                for _recc in range(self.reccount):
                    # make some stuff up and write
                    dkey = (''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(5)))
                    akey = (''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(5)))
                    data = (''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(self.size)))

                    obj, txn = container.write_an_obj(data,
                                                      len(data),
                                                      dkey,
                                                      akey,
                                                      obj,
                                                      self.rank,
                                                      obj_cls=16)

                    saved_data.append((obj, dkey, akey, data, txn))

                    # read the data back and make sure its correct
                    data2 = container.read_an_obj(self.size, dkey, akey, obj,
                                                  txn)
                    if data != data2.value:
                        self.fail("Write data 1, read it back, didn't match\n")

            # kill a server that has
            server = DaosServer(self.context, self.server_group, self.rank)
            server.kill(1)

            # temporarily, the exclude of a failed target must be done manually
            pool.exclude([self.rank])

            while True:
                # get the pool/rebuild status again
                pool.pool_query()
                if pool.pool_info.pi_rebuild_st.rs_done == 1:
                    break
                else:
                    time.sleep(2)

            if pool.pool_info.pi_ndisabled != 1:
                self.fail(
                    "Number of disabled targets reporting incorrectly: {}".
                    format(pool.pool_info.pi_ndisabled))
            if pool.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error reported: {}".format(
                    pool.pool_info.pi_rebuild_st.rs_errno))
            if pool.pool_info.pi_rebuild_st.rs_obj_nr != self.objcount:
                self.fail("Rebuilt objs not as expected: {0} {1}".format(
                    pool.pool_info.pi_rebuild_st.rs_obj_nr, self.objcount))
            if (pool.pool_info.pi_rebuild_st.rs_rec_nr !=
                (self.reccount * self.objcount)):
                self.fail("Rebuilt recs not as expected: {0} {1}".format(
                    pool.pool_info.pi_rebuild_st.rs_rec_nr,
                    self.reccount * self.objcount))

            # now that the rebuild finished verify the records are correct
            for tup in saved_data:
                data2 = container.read_an_obj(len(tup[3]), tup[1], tup[2],
                                              tup[0], tup[4])
                if tup[3] != data2.value:
                    self.fail("after rebuild data didn't check out")

        except DaosApiError as excp:
            print(excp)
            print(traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")
示例#15
0
    def test_multipool_rebuild(self):
        """
        Test ID: Rebuild-002
        Test Description: Expand on the basic test by rebuilding 2
        pools at once.

        Use Cases:
          -- multipool rebuild, single client, various object and record counds

        :avocado: tags=pool,rebuild,rebuildmulti
        """
        try:
            # initialize python pool object then create the underlying
            # daos storage, the way the code is now the pools should be
            # on the same storage and have the same service leader
            pool1 = DaosPool(self.context)
            pool2 = DaosPool(self.context)
            pool1.create(self.createmode, self.createuid, self.creategid,
                         self.createsize, self.createsetid)
            pool2.create(self.createmode, self.createuid, self.creategid,
                         self.createsize, self.createsetid)

            # want an open connection during rebuild
            pool1.connect(1 << 1)
            pool2.connect(1 << 1)

            # create containers
            container1 = DaosContainer(self.context)
            container1.create(pool1.handle)
            container2 = DaosContainer(self.context)
            container2.create(pool2.handle)

            # now open them
            container1.open()
            container2.open()

            # Putting the same data in both pools, at least for now to simplify
            # checking its correct
            saved_data = []
            for _objc in range(self.objcount):
                obj = None
                for _recc in range(self.reccount):

                    # make some stuff up and write
                    dkey = (
                        ''.join(random.choice(string.ascii_uppercase +
                                              string.digits) for _ in range(5)))
                    akey = (
                        ''.join(random.choice(string.ascii_uppercase +
                                              string.digits) for _ in range(5)))
                    data = (
                        ''.join(random.choice(string.ascii_uppercase +
                                              string.digits) for _ in
                                range(self.size)))

                    # Used DAOS_OC_R1S_SPEC_RANK
                    # 1 replica with specified rank
                    obj, txn = container1.write_an_obj(data, len(data), dkey,
                                                       akey, obj, self.rank,
                                                       obj_cls=15)
                    obj, txn = container2.write_an_obj(data, len(data), dkey,
                                                       akey, obj, self.rank,
                                                       obj_cls=15)
                    saved_data.append((obj, dkey, akey, data, txn))

                    # read the data back and make sure its correct containers
                    data2 = container1.read_an_obj(self.size, dkey, akey, obj,
                                                   txn)
                    if data != data2.value:
                        self.fail("Wrote data P1, read it back, didn't match\n")
                    data2 = container2.read_an_obj(self.size, dkey, akey, obj,
                                                   txn)
                    if data != data2.value:
                        self.fail("Wrote data P2, read it back, didn't match\n")

            # kill a server
            server = DaosServer(self.context, self.server_group, self.rank)
            server.kill(1)

            # temporarily, the exclude of a failed target must be done
            # manually
            pool1.exclude([self.rank])
            pool2.exclude([self.rank])

            # check that rebuild finishes, no errors, progress data as
            # know it to be.  Check pool 1 first then we'll check 2 below.
            while True:
                pool1.pool_query()
                if pool1.pool_info.pi_rebuild_st.rs_done == 1:
                    break
                else:
                    time.sleep(2)

            # check there are no errors and other data matches what we
            # apriori know to be true,
            if pool1.pool_info.pi_ndisabled != 1:
                self.fail("P1 number disabled targets reporting incorrectly: {}"
                          .format(pool1.pool_info.pi_ndisabled))
            if pool1.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("P1 rebuild error reported: {}"
                          .format(pool1.pool_info.pi_rebuild_st.rs_errno))
            if pool1.pool_info.pi_rebuild_st.rs_obj_nr != self.objcount:
                self.fail("P1 rebuilt objs not as expected: {0} {1}"
                          .format(pool1.pool_info.pi_rebuild_st.rs_obj_nr,
                                  self.objcount))
            if (pool1.pool_info.pi_rebuild_st.rs_rec_nr !=
                    (self.reccount*self.objcount)):
                self.fail("P1 rebuilt recs not as expected: {0} {1}"
                          .format(pool1.pool_info.pi_rebuild_st.rs_rec_nr,
                                  self.reccount*self.objcount))

            # now that the rebuild finished verify the records are correct
            for tup in saved_data:
                data2 = container1.read_an_obj(len(tup[3]), tup[1], tup[2],
                                               tup[0], tup[4])
                if tup[3] != data2.value:
                    self.fail("after rebuild data didn't check out")

            # now check the other pool
            while True:
                pool2.pool_query()
                if pool2.pool_info.pi_rebuild_st.rs_done == 1:
                    break
                else:
                    time.sleep(2)

            # check there are no errors and other data matches what we
            # apriori know to be true
            if pool2.pool_info.pi_ndisabled != 1:
                self.fail("Number disabled targets reporting incorrectly: {}"
                          .format(pool2.pool_info.pi_ndisabled))
            if pool2.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error reported: {}"
                          .format(pool2.pool_info.pi_rebuild_st.rs_errno))
            if pool2.pool_info.pi_rebuild_st.rs_obj_nr != self.objcount:
                self.fail("Rebuilt objs not as expected: {0} {1}"
                          .format(pool2.pool_info.pi_rebuild_st.rs_obj_nr,
                                  self.objcount))
            if (pool2.pool_info.pi_rebuild_st.rs_rec_nr !=
                    (self.reccount*self.objcount)):
                self.fail("Rebuilt recs not as expected: {0} {1}".
                          format(pool2.pool_info.pi_rebuild_st.rs_rec_nr,
                                 (self.reccount*self.objcount)))

            # now that the rebuild finished verify the records are correct
            for tup in saved_data:
                data2 = container2.read_an_obj(len(tup[3]), tup[1], tup[2],
                                               tup[0], tup[4])
                if tup[3] != data2.value:
                    self.fail("after rebuild data didn't check out")

        except DaosApiError as excp:
            print (excp)
            print (traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")
示例#16
0
    def test_simple_rebuild(self):
        """
        Test ID: Rebuild-001

        Test Description: The most basic rebuild test.

        Use Cases:
          -- single pool rebuild, single client, various reord/object
             counts

        :avocado: tags=pool,rebuild,rebuildsimple
        """
        try:

            # initialize a python pool object then create the underlying
            # daos storage
            pool = DaosPool(self.context)
            pool.create(self.createmode, self.createuid, self.creategid,
                        self.createsize, self.createsetid)

            # want an open connection during rebuild
            pool.connect(1 << 1)

            # get pool status we want to test later
            pool.pool_query()
            if pool.pool_info.pi_ndisabled != 0:
                self.fail("Number of disabled targets reporting incorrectly.\n")
            if pool.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error but rebuild hasn't run.\n")
            if pool.pool_info.pi_rebuild_st.rs_done != 1:
                self.fail("Rebuild is running but device hasn't failed yet.\n")
            if pool.pool_info.pi_rebuild_st.rs_obj_nr != 0:
                self.fail("Rebuilt objs not zero.\n")
            if pool.pool_info.pi_rebuild_st.rs_rec_nr != 0:
                self.fail("Rebuilt recs not zero.\n")

            # create a container
            container = DaosContainer(self.context)
            container.create(pool.handle)

            # now open it
            container.open()

            saved_data = []
            for _objc in range(self.objcount):
                obj = None
                for _recc in range(self.reccount):
                    # make some stuff up and write
                    dkey = (
                        ''.join(random.choice(string.ascii_uppercase +
                                              string.digits) for _ in range(5)))
                    akey = (
                        ''.join(random.choice(string.ascii_uppercase +
                                              string.digits) for _ in range(5)))
                    data = (''.join(random.choice(string.ascii_uppercase +
                                                  string.digits)
                                    for _ in range(self.size)))

                    obj, txn = container.write_an_obj(data, len(data), dkey,
                                                      akey, obj, self.rank,
                                                      obj_cls=16)

                    saved_data.append((obj, dkey, akey, data, txn))

                    # read the data back and make sure its correct
                    data2 = container.read_an_obj(self.size, dkey, akey, obj,
                                                  txn)
                    if data != data2.value:
                        self.fail("Write data 1, read it back, didn't match\n")

            # kill a server that has
            server = DaosServer(self.context, self.server_group, self.rank)
            server.kill(1)

            # temporarily, the exclude of a failed target must be done manually
            pool.exclude([self.rank])

            while True:
                # get the pool/rebuild status again
                pool.pool_query()
                if pool.pool_info.pi_rebuild_st.rs_done == 1:
                    break
                else:
                    time.sleep(2)

            if pool.pool_info.pi_ndisabled != 1:
                self.fail("Number of disabled targets reporting incorrectly: {}"
                          .format(pool.pool_info.pi_ndisabled))
            if pool.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error reported: {}"
                          .format(pool.pool_info.pi_rebuild_st.rs_errno))
            if pool.pool_info.pi_rebuild_st.rs_obj_nr != self.objcount:
                self.fail("Rebuilt objs not as expected: {0} {1}"
                          .format(pool.pool_info.pi_rebuild_st.rs_obj_nr,
                                  self.objcount))
            if (pool.pool_info.pi_rebuild_st.rs_rec_nr !=
                    (self.reccount*self.objcount)):
                self.fail("Rebuilt recs not as expected: {0} {1}"
                          .format(pool.pool_info.pi_rebuild_st.rs_rec_nr,
                                  self.reccount*self.objcount))

            # now that the rebuild finished verify the records are correct
            for tup in saved_data:
                data2 = container.read_an_obj(len(tup[3]), tup[1], tup[2],
                                              tup[0], tup[4])
                if tup[3] != data2.value:
                    self.fail("after rebuild data didn't check out")

        except DaosApiError as excp:
            print (excp)
            print (traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")
示例#17
0
    def test_poolsvc(self):
        """
        Test svc arg during pool create.

        :avocado: tags=pool,svc
        """

        # parameters used in pool create
        createmode = self.params.get("mode", '/run/createtests/createmode/*/')
        createuid = os.geteuid()
        creategid = os.getegid()
        createsetid = self.params.get("setname", '/run/createtests/createset/')
        createsize = self.params.get("size", '/run/createtests/createsize/')
        createsvc = self.params.get("svc", '/run/createtests/createsvc/*/')

        expected_result = createsvc[1]

        try:
            # initialize a python pool object then create the underlying
            # daos storage
            self.pool = DaosPool(self.context)
            self.pool.create(createmode, createuid, creategid,
                             createsize, createsetid, None, None, createsvc[0])
            self.pool.connect(1 << 1)

            # checking returned rank list for server more than 1
            i = 0
            while (
                    int(self.pool.svc.rl_ranks[i]) > 0 and
                    int(self.pool.svc.rl_ranks[i]) <= createsvc[0] and
                    int(self.pool.svc.rl_ranks[i]) != 999999
            ):
                i += 1
            if i != createsvc[0]:
                self.fail("Length of Returned Rank list is not equal to "
                          "the number of Pool Service members.\n")
            rank_list = []
            for j in range(createsvc[0]):
                rank_list.append(int(self.pool.svc.rl_ranks[j]))
                if len(rank_list) != len(set(rank_list)):
                    self.fail("Duplicate values in returned rank list")

            if createsvc[0] == 3:
                self.pool.disconnect()
                cmd = ('{0} kill-leader  --uuid={1}'
                       .format(self.daosctl, self.pool.get_uuid_str()))
                process.system(cmd)
                self.pool.connect(1 << 1)
                self.pool.disconnect()
                server = DaosServer(self.context, self.server_group, 2)
                server.kill(1)
                self.pool.exclude([2])
                self.pool.connect(1 << 1)

            if expected_result in ['FAIL']:
                self.fail("Test was expected to fail but it passed.\n")

        except DaosApiError as excep:
            print(excep)
            print(traceback.format_exc())
            if expected_result == 'PASS':
                self.fail("Test was expected to pass but it failed.\n")
示例#18
0
    def test_multipool_rebuild(self):
        """
        Test ID: Rebuild-002
        Test Description: Expand on the basic test by rebuilding 2
        pools at once.

        Use Cases:
          -- multipool rebuild, single client, various object and record counds

        :avocado: tags=pool,rebuild,rebuildmulti
        """

        # the rebuild tests need to redo this stuff each time so not in setup
        # as it usually would be
        setid = self.params.get("setname", '/run/testparams/setnames/')
        server_group = self.params.get("server_group", '/server/',
                                       'daos_server')

        basepath = os.path.normpath(self.build_paths['PREFIX'] + "/../")
        tmp = self.build_paths['PREFIX'] + '/tmp'

        self.hostlist = self.params.get("test_machines", '/run/hosts/')
        hostfile = WriteHostFile.WriteHostFile(self.hostlist, tmp)

        try:
            ServerUtils.runServer(hostfile, server_group, basepath)

            # use the uid/gid of the user running the test, these should
            # be perfectly valid
            createuid = os.geteuid()
            creategid = os.getegid()

            # parameters used in pool create that are in yaml
            createmode = self.params.get("mode", '/run/testparams/createmode/')
            createsetid = self.params.get("setname",
                                          '/run/testparams/createset/')
            createsize = self.params.get("size", '/run/testparams/createsize/')

            # initialize python pool object then create the underlying
            # daos storage, the way the code is now the pools should be
            # on the same storage and have the same service leader
            pool1 = DaosPool(self.Context)
            pool2 = DaosPool(self.Context)
            pool1.create(createmode, createuid, creategid, createsize,
                         createsetid, None)
            pool2.create(createmode, createuid, creategid, createsize,
                         createsetid, None)

            # want an open connection during rebuild
            pool1.connect(1 << 1)
            pool2.connect(1 << 1)

            # create containers
            container1 = DaosContainer(self.Context)
            container1.create(pool1.handle)
            container2 = DaosContainer(self.Context)
            container2.create(pool2.handle)

            # now open them
            container1.open()
            container2.open()

            # how many objects and records are we creating
            objcount = self.params.get("objcount",
                                       '/run/testparams/numobjects/*')
            reccount = self.params.get("reccount",
                                       '/run/testparams/numrecords/*')
            if objcount == 0:
                reccount = 0

            # which rank to write to and kill
            rank = self.params.get("rank", '/run/testparams/ranks/*')

            # how much data to write with each key
            size = self.params.get("size", '/run/testparams/datasize/')

            # Putting the same data in both pools, at least for now to simplify
            # checking its correct
            saved_data = []
            for i in range(0, objcount):
                obj = None
                for j in range(0, reccount):

                    # make some stuff up and write
                    dkey = ''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(5))
                    akey = ''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(5))
                    data = ''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(size))

                    obj, tx = container1.write_an_obj(data, len(data), dkey,
                                                      akey, obj, rank)
                    obj, tx = container2.write_an_obj(data, len(data), dkey,
                                                      akey, obj, rank)

                    saved_data.append((obj, dkey, akey, data, tx))

                    # read the data back and make sure its correct
                    # containers
                    data2 = container1.read_an_obj(size, dkey, akey, obj, tx)
                    if data != data2.value:
                        self.fail(
                            "Wrote data P1, read it back, didn't match\n")

                    # containers
                    data2 = container2.read_an_obj(size, dkey, akey, obj, tx)
                    if data != data2.value:
                        self.fail(
                            "Wrote data P2, read it back, didn't match\n")

            # kill a server
            server = DaosServer(self.Context, server_group, rank)
            server.kill(1)

            # temporarily, the exclude of a failed target must be done
            # manually
            pool1.exclude([rank])
            pool2.exclude([rank])

            # check that rebuild finishes, no errors, progress data as
            # know it to be.  Check pool 1 first then we'll check 2 below.
            while True:
                pool1.pool_query()
                if pool1.pool_info.pi_rebuild_st.rs_done == 1:
                    break
                else:
                    time.sleep(2)

            # check there are no errors and other data matches what we
            # apriori know to be true,
            if pool1.pool_info.pi_ndisabled != 1:
                self.fail(
                    "P1 number disabled targets reporting incorrectly: {}".
                    format(pool1.pool_info.pi_ndisabled))
            if pool1.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("P1 rebuild error reported: {}".format(
                    pool1.pool_info.pi_rebuild_st.rs_errno))
            if pool1.pool_info.pi_rebuild_st.rs_obj_nr != objcount:
                self.fail("P1 rebuilt objs not as expected: {0} {1}".format(
                    pool1.pool_info.pi_rebuild_st.rs_obj_nr, objcount))
            if pool1.pool_info.pi_rebuild_st.rs_rec_nr != (reccount *
                                                           objcount):
                self.fail("P1 rebuilt recs not as expected: {0} {1}".format(
                    pool1.pool_info.pi_rebuild_st.rs_rec_nr,
                    reccount * objcount))

            # now that the rebuild finished verify the records are correct
            for tup in saved_data:
                data2 = container1.read_an_obj(len(tup[3]), tup[1], tup[2],
                                               tup[0], tup[4])
                if tup[3] != data2.value:
                    self.fail("after rebuild data didn't check out")

            # now check the other pool
            while True:
                pool2.pool_query()
                if pool2.pool_info.pi_rebuild_st.rs_done == 1:
                    break
                else:
                    time.sleep(2)

            # check there are no errors and other data matches what we
            # apriori know to be true
            if pool2.pool_info.pi_ndisabled != 1:
                self.fail(
                    "Number disabled targets reporting incorrectly: {}".format(
                        pool2.pool_info.pi_ndisabled))
            if pool2.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error reported: {}".format(
                    pool2.pool_info.pi_rebuild_st.rs_errno))
            if pool2.pool_info.pi_rebuild_st.rs_obj_nr != objcount:
                self.fail("Rebuilt objs not as expected: {0} {1}".format(
                    pool2.pool_info.pi_rebuild_st.rs_obj_nr, objcount))
            if pool2.pool_info.pi_rebuild_st.rs_rec_nr != (reccount *
                                                           objcount):
                self.fail("Rebuilt recs not as expected: {0} {1}".format(
                    pool2.pool_info.pi_rebuild_st.rs_rec_nr,
                    (reccount * objcount)))

            # now that the rebuild finished verify the records are correct
            for tup in saved_data:
                data2 = container2.read_an_obj(len(tup[3]), tup[1], tup[2],
                                               tup[0], tup[4])
                if tup[3] != data2.value:
                    self.fail("after rebuild data didn't check out")

        except DaosApiError as e:
            print(e)
            print(traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")

        finally:
            ServerUtils.stopServer(hosts=self.hostlist)
            os.remove(hostfile)
            CheckForPool.CleanupPools(self.hostlist)
            ServerUtils.killServer(self.hostlist)
示例#19
0
    def test_simple_rebuild(self):
        """
        Test ID: Rebuild-001

        Test Description: The most basic rebuild test.

        Use Cases:
          -- single pool rebuild, single client, various reord/object
             counts

        :avocado: tags=pool,rebuild,rebuildsimple
        """

        # the rebuild tests need to redo this stuff each time so not in setup
        # as it usually would be
        setid = self.params.get("setname", '/run/testparams/setnames/')
        server_group = self.params.get("server_group", '/server/',
                                       'daos_server')

        basepath = os.path.normpath(self.build_paths['PREFIX'] + "/../")
        tmp = self.build_paths['PREFIX'] + '/tmp'

        self.hostlist = self.params.get("test_machines", '/run/hosts/')
        hostfile = WriteHostFile.WriteHostFile(self.hostlist, tmp)

        try:
            ServerUtils.runServer(hostfile, server_group, basepath)

            # use the uid/gid of the user running the test, these should
            # be perfectly valid
            createuid = os.geteuid()
            creategid = os.getegid()

            # parameters used in pool create that are in yaml
            createmode = self.params.get("mode", '/run/testparams/createmode/')
            createsetid = self.params.get("setname",
                                          '/run/testparams/createset/')
            createsize = self.params.get("size", '/run/testparams/createsize/')

            # initialize a python pool object then create the underlying
            # daos storage
            pool = DaosPool(self.Context)
            pool.create(createmode, createuid, creategid, createsize,
                        createsetid, None)

            # want an open connection during rebuild
            pool.connect(1 << 1)

            # get pool status we want to test later
            pool.pool_query()
            if pool.pool_info.pi_ndisabled != 0:
                self.fail(
                    "Number of disabled targets reporting incorrectly.\n")
            if pool.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error but rebuild hasn't run.\n")
            if pool.pool_info.pi_rebuild_st.rs_done != 1:
                self.fail("Rebuild is running but device hasn't failed yet.\n")
            if pool.pool_info.pi_rebuild_st.rs_obj_nr != 0:
                self.fail("Rebuilt objs not zero.\n")
            if pool.pool_info.pi_rebuild_st.rs_rec_nr != 0:
                self.fail("Rebuilt recs not zero.\n")
            pool_version = pool.pool_info.pi_rebuild_st.rs_version

            # create a container
            container = DaosContainer(self.Context)
            container.create(pool.handle)

            # now open it
            container.open()

            # how many objects and records are we creating
            objcount = self.params.get("objcount",
                                       '/run/testparams/numobjects/*')
            reccount = self.params.get("reccount",
                                       '/run/testparams/numrecords/*')
            if objcount == 0:
                reccount = 0

            # which rank to write to and kill
            rank = self.params.get("rank", '/run/testparams/ranks/*')

            # how much data to write with each key
            size = self.params.get("size", '/run/testparams/datasize/')

            saved_data = []
            for i in range(0, objcount):
                obj = None
                for j in range(0, reccount):

                    # make some stuff up and write
                    dkey = ''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(5))
                    akey = ''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(5))
                    data = ''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(size))

                    obj, tx = container.write_an_obj(data, len(data), dkey,
                                                     akey, obj, rank)

                    saved_data.append((obj, dkey, akey, data, tx))

                    # read the data back and make sure its correct
                    data2 = container.read_an_obj(size, dkey, akey, obj, tx)
                    if data != data2.value:
                        self.fail("Write data 1, read it back, didn't match\n")

            # kill a server that has
            server = DaosServer(self.Context, server_group, rank)
            server.kill(1)

            # temporarily, the exclude of a failed target must be done
            # manually
            pool.exclude([rank])

            while True:
                # get the pool/rebuild status again
                pool.pool_query()
                if pool.pool_info.pi_rebuild_st.rs_done == 1:
                    break
                else:
                    time.sleep(2)

            if pool.pool_info.pi_ndisabled != 1:
                self.fail(
                    "Number of disabled targets reporting incorrectly: {}".
                    format(pool.pool_info.pi_ndisabled))
            if pool.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error reported: {}".format(
                    pool.pool_info.pi_rebuild_st.rs_errno))
            if pool.pool_info.pi_rebuild_st.rs_obj_nr != objcount:
                self.fail("Rebuilt objs not as expected: {0} {1}".format(
                    pool.pool_info.pi_rebuild_st.rs_obj_nr, objcount))
            if pool.pool_info.pi_rebuild_st.rs_rec_nr != (reccount * objcount):
                self.fail("Rebuilt recs not as expected: {0} {1}".format(
                    pool.pool_info.pi_rebuild_st.rs_rec_nr,
                    reccount * objcount))

            # now that the rebuild finished verify the records are correct
            for tup in saved_data:
                data2 = container.read_an_obj(len(tup[3]), tup[1], tup[2],
                                              tup[0], tup[4])
                if tup[3] != data2.value:
                    self.fail("after rebuild data didn't check out")

        except DaosApiError as e:
            print(e)
            print(traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")

        finally:
            try:
                ServerUtils.stopServer(hosts=self.hostlist)
                os.remove(hostfile)
                # really make sure everything is gone
                CheckForPool.CleanupPools(self.hostlist)
            finally:
                ServerUtils.killServer(self.hostlist)
示例#20
0
    def test_poolsvc(self):
        """
        Test svc arg during pool create.

        :avocado: tags=all,pool,pr,medium,svc
        """

        # parameters used in pool create
        createmode = self.params.get("mode", '/run/createtests/createmode/*/')
        createuid = os.geteuid()
        creategid = os.getegid()
        createsetid = self.params.get("setname", '/run/createtests/createset/')
        createsize = self.params.get("size", '/run/createtests/createsize/')
        createsvc = self.params.get("svc", '/run/createtests/createsvc/*/')

        expected_result = createsvc[1]

        try:
            # initialize a python pool object then create the underlying
            # daos storage
            self.pool = DaosPool(self.context)
            self.pool.create(createmode, createuid, creategid, createsize,
                             createsetid, None, None, createsvc[0])
            self.pool.connect(1 << 1)

            # checking returned rank list for server more than 1
            iterator = 0
            while (int(self.pool.svc.rl_ranks[iterator]) > 0
                   and int(self.pool.svc.rl_ranks[iterator]) <= createsvc[0]
                   and int(self.pool.svc.rl_ranks[iterator]) != 999999):
                iterator += 1
            if iterator != createsvc[0]:
                self.fail("Length of Returned Rank list is not equal to "
                          "the number of Pool Service members.\n")
            rank_list = []
            for iterator in range(createsvc[0]):
                rank_list.append(int(self.pool.svc.rl_ranks[iterator]))
                if len(rank_list) != len(set(rank_list)):
                    self.fail("Duplicate values in returned rank list")

            self.pool.pool_query()
            leader = self.pool.pool_info.pi_leader
            if createsvc[0] == 3:
                # kill pool leader and exclude it
                self.pool.pool_svc_stop()
                self.pool.exclude([leader])
                # perform pool disconnect, try connect again and disconnect
                self.pool.disconnect()
                self.pool.connect(1 << 1)
                self.pool.disconnect()
                # kill another server which is not a leader and exclude it
                server = DaosServer(self.context, self.server_group, 3)
                server.kill(1)
                self.pool.exclude([3])
                # perform pool connect
                self.pool.connect(1 << 1)

            if expected_result in ['FAIL']:
                self.fail("Test was expected to fail but it passed.\n")

        except DaosApiError as excep:
            print(excep)
            print(traceback.format_exc())
            if expected_result == 'PASS':
                self.fail("Test was expected to pass but it failed.\n")