示例#1
0
def check_handle(buf_len, iov_len, buf, uuidstr, rank):
    """
    This gets run in a child process and verifyes the global
    handle can be turned into a local handle in another process.
    """

    try:
        # get paths from the build_vars generated by build
        with open('../../../.build_vars.json') as build_file:
            build_paths = json.load(build_file)

        # setup the DAOS python API in this process
        context = DaosContext(build_paths['PREFIX'] + '/lib/')

        pool = DaosPool(context)
        pool.set_uuid_str(uuidstr)
        pool.set_svc(rank)
        pool.group = "daos_server"

        # note that the handle is stored inside the pool as well
        dummy_local_handle = pool.global2local(context, iov_len, buf_len, buf)

        # perform some operations that will use the new handle
        pool.pool_query()
        container = DaosContainer(context)
        container.create(pool.handle)

    except DaosApiError as excep:
        print(excep)
        print(traceback.format_exc())
        raise

    return
示例#2
0
def check_handle(buf_len, iov_len, buf, uuidstr, rank):
    """
    This gets run in a child process and verifyes the global
    handle can be turned into a local handle in another process.
    """

    try:
        # get paths from the build_vars generated by build
        with open('../../../.build_vars.json') as build_file:
            build_paths = json.load(build_file)

        # setup the DAOS python API in this process
        context = DaosContext(build_paths['PREFIX'] + '/lib/')

        pool = DaosPool(context)
        pool.set_uuid_str(uuidstr)
        pool.set_svc(rank)
        pool.group = "daos_server"

        # note that the handle is stored inside the pool as well
        dummy_local_handle = pool.global2local(context, iov_len, buf_len, buf)

        # perform some operations that will use the new handle
        pool.pool_query()
        container = DaosContainer(context)
        container.create(pool.handle)

    except DaosApiError as excep:
        print(excep)
        print(traceback.format_exc())
        raise

    return
示例#3
0
def check_handle(pool_glob_handle, uuidstr, cont_glob_handle, rank):
    """
    This gets run in a child process and verifyes the global
    handles can be turned into local handles in another process.
    """

    try:
        # get paths from the build_vars generated by build
        with open('../../../.build_vars.json') as build_file:
            build_paths = json.load(build_file)

        # setup the DAOS python API in this process
        context = DaosContext(build_paths['PREFIX'] + '/lib/')


        # setup the pool and connect using global handle
        pool = DaosPool(context)
        pool.uuid = uuidstr
        pool.set_svc(rank)
        pool.group = "daos_server"
        buf = ctypes.cast(pool_glob_handle.iov_buf,
                          ctypes.POINTER(ctypes.c_byte *
                                         pool_glob_handle.iov_buf_len))
        buf2 = bytearray()
        buf2.extend(buf.contents)
        pool_handle = pool.global2local(context,
                                        pool_glob_handle.iov_len,
                                        pool_glob_handle.iov_buf_len,
                                        buf2)

        # perform an operation that will use the new handle, if it
        # doesn't throw an exception, then all is well.
        pool.pool_query()

        # setup the container and then connect using the global handle
        container = DaosContainer(context)
        container.poh = pool_handle
        buf = ctypes.cast(cont_glob_handle.iov_buf,
                          ctypes.POINTER(ctypes.c_byte *
                                         cont_glob_handle.iov_buf_len))
        buf2 = bytearray()
        buf2.extend(buf.contents)
        dummy_cont_handle = container.global2local(context,
                                                   cont_glob_handle.iov_len,
                                                   cont_glob_handle.iov_buf_len,
                                                   buf2)
        # just try one thing to make sure handle is good
        container.query()

    except DaosApiError as excep:
        print(excep)
        print(traceback.format_exc())
        raise

    return
示例#4
0
def check_handle(pool_glob_handle, uuidstr, cont_glob_handle, rank):
    """
    This gets run in a child process and verifyes the global
    handles can be turned into local handles in another process.
    """

    try:
        # get paths from the build_vars generated by build
        with open('../../../.build_vars.json') as build_file:
            build_paths = json.load(build_file)

        # setup the DAOS python API in this process
        context = DaosContext(build_paths['PREFIX'] + '/lib/')


        # setup the pool and connect using global handle
        pool = DaosPool(context)
        pool.uuid = uuidstr
        pool.set_svc(rank)
        pool.group = "daos_server"
        buf = ctypes.cast(pool_glob_handle.iov_buf,
                          ctypes.POINTER(ctypes.c_byte *
                                         pool_glob_handle.iov_buf_len))
        buf2 = bytearray()
        buf2.extend(buf.contents)
        pool_handle = pool.global2local(context,
                                        pool_glob_handle.iov_len,
                                        pool_glob_handle.iov_buf_len,
                                        buf2)

        # perform an operation that will use the new handle, if it
        # doesn't throw an exception, then all is well.
        pool.pool_query()

        # setup the container and then connect using the global handle
        container = DaosContainer(context)
        container.poh = pool_handle
        buf = ctypes.cast(cont_glob_handle.iov_buf,
                          ctypes.POINTER(ctypes.c_byte *
                                         cont_glob_handle.iov_buf_len))
        buf2 = bytearray()
        buf2.extend(buf.contents)
        dummy_cont_handle = container.global2local(context,
                                                   cont_glob_handle.iov_len,
                                                   cont_glob_handle.iov_buf_len,
                                                   buf2)
        # just try one thing to make sure handle is good
        container.query()

    except DaosApiError as excep:
        print(excep)
        print(traceback.format_exc())
        raise

    return
示例#5
0
class InfoTests(Test):
    """
    Tests DAOS pool query.
    """
    def setUp(self):
        # get paths from the build_vars generated by build
        with open(
                os.path.join(os.path.dirname(os.path.realpath(__file__)),
                             "../../../../.build_vars.json")) as f:
            build_paths = json.load(f)
        self.basepath = os.path.normpath(build_paths['PREFIX'] + "/../")
        self.tmp = build_paths['PREFIX'] + '/tmp'
        self.server_group = self.params.get("server_group", '/server/',
                                            'daos_server')

        context = DaosContext(build_paths['PREFIX'] + '/lib/')

        self.pool = DaosPool(context)
        self.d_log = DaosLog(context)
        self.hostlist = self.params.get("test_machines1", '/run/hosts/')
        self.hostfile = WriteHostFile.WriteHostFile(self.hostlist, self.tmp)
        ServerUtils.runServer(self.hostfile, self.server_group, self.basepath)

    def tearDown(self):
        # shut 'er down
        try:
            if self.pool:
                self.pool.destroy(1)
            os.remove(self.hostfile)
        finally:
            ServerUtils.stopServer(hosts=self.hostlist)

    def test_simple_query(self):
        """
        Test querying a pool created on a single server.

        :avocado: tags=pool,poolquery,infotest
        """
        # create pool
        mode = self.params.get("mode", '/run/testparams/modes/*', 0731)
        if mode == 73:
            self.cancel('Cancel the mode test 73 because of DAOS-1877')

        uid = os.geteuid()
        gid = os.getegid()
        size = self.params.get("size", '/run/testparams/sizes/*', 0)
        group = self.server_group

        self.pool.create(mode, uid, gid, size, group, None)

        # connect to the pool
        flags = self.params.get("perms", '/run/testparams/connectperms/*', '')
        connect_flags = 1 << flags
        self.pool.connect(connect_flags)

        # query the pool
        pool_info = self.pool.pool_query()

        # check uuid
        uuid_str = c_uuid_to_str(pool_info.pi_uuid)
        if uuid_str != self.pool.get_uuid_str():
            self.d_log.error("UUID str does not match expected string")
            self.fail("UUID str does not match expected string")
        '''
        # validate size of pool is what we expect
        This check is currently disabled, as space is not implemented in
        DAOS C API yet.
        if size != pool_info.pi_space:
            self.d_log.error("expected size {0} did not match actual size {1}"
                      .format(size, pool_info.pi_space))
            self.fail("expected size {0} did not match actual size {1}"
                      .format(size, pool_info.pi_space))
        '''

        # number of targets
        if pool_info.pi_ntargets != len(self.hostlist):
            self.d_log.error("found number of targets in pool did not match "
                             "expected number, 1. num targets: {0}".format(
                                 pool_info.pi_ntargets))
            self.fail("found number of targets in pool did not match "
                      "expected number, 1. num targets: {0}".format(
                          pool_info.pi_ntargets))

        # number of disabled targets
        if pool_info.pi_ndisabled > 0:
            self.d_log.error("found disabled targets, none expected to be")
            self.fail("found disabled targets, none expected to be disabled")

        # mode
        if pool_info.pi_mode != mode:
            self.d_log.error(
                "found different mode than expected. expected {0}, "
                "found {1}.".format(mode, pool_info.pi_mode))
            self.fail("found different mode than expected. expected {0}, "
                      "found {1}.".format(mode, pool_info.pi_mode))

        # uid
        if pool_info.pi_uid != uid:
            self.d_log.error(
                "found actual pool uid {0} does not match expected "
                "uid {1}".format(pool_info.pi_uid, uid))
            self.fail("found actual pool uid {0} does not match expected uid "
                      "{1}".format(pool_info.pi_uid, uid))

        # gid
        if pool_info.pi_gid != gid:
            self.d_log.error(
                "found actual pool gid {0} does not match expected "
                "gid {1}".format(pool_info.pi_gid, gid))
            self.fail("found actual pool gid {0} does not match expected gid "
                      "{1}".format(pool_info.pi_gid, gid))
示例#6
0
文件: InfoTests.py 项目: chen0qi/daos
class InfoTests(Test):
    """
    Tests DAOS pool query.
    """
    def setUp(self):
        # get paths from the build_vars generated by build
        with open(
                os.path.join(os.path.dirname(os.path.realpath(__file__)),
                             "../../../../.build_vars.json")) as f:
            build_paths = json.load(f)
        self.basepath = os.path.normpath(build_paths['PREFIX'] + "/../")
        self.tmp = build_paths['PREFIX'] + '/tmp'
        self.server_group = self.params.get("server_group", '/server/',
                                            'daos_server')

        context = DaosContext(build_paths['PREFIX'] + '/lib/')
        print("initialized!!!\n")

        self.pool = DaosPool(context)
        self.hostlist = self.params.get("test_machines1", '/run/hosts/')
        self.hostfile = WriteHostFile.WriteHostFile(self.hostlist, self.tmp)
        ServerUtils.runServer(self.hostfile, self.server_group, self.basepath)

    def tearDown(self):
        # shut 'er down
        self.pool.destroy(1)
        ServerUtils.stopServer()
        os.remove(self.hostfile)

    def test_simple_query(self):
        """
        Test querying a pool created on a single server.

        :avocado: tags=pool,poolquery,infotest
        """
        # there is a presumption that this test lives in a specific spot
        # in the repo

        # create pool
        mode = self.params.get("mode", '/run/testparams/modes/*', 0731)
        uid = os.geteuid()
        gid = os.getegid()
        size = self.params.get("size", '/run/testparams/sizes/*', 0)
        tgt_list = None
        group = self.server_group

        self.pool.create(mode, uid, gid, size, group, tgt_list)
        PROGRESS_LOG.info("created pool")

        # connect to the pool
        flags = self.params.get("perms", '/run/testparams/connectperms/*', '')
        connect_flags = 1 << flags
        self.pool.connect(connect_flags)
        PROGRESS_LOG.info("connected to pool")

        # query the pool
        pool_info = self.pool.pool_query()
        PROGRESS_LOG.info("queried pool info")

        # check uuid
        uuid_str = c_uuid_to_str(pool_info.pi_uuid)
        PROGRESS_LOG.info("pool uuid pool_info.pi_uuid: {0}".format(uuid_str))
        PROGRESS_LOG.info("pool uuid saved in api at create time: "
                          "{0}".format(self.pool.get_uuid_str()))
        if uuid_str != self.pool.get_uuid_str():
            self.fail("UUID str does not match expected string")

        # validate size of pool is what we expect
        PROGRESS_LOG.info("pool should be {0} bytes".format(size))
        PROGRESS_LOG.info("pool actual space is {0} bytes".format(
            pool_info.pi_space))
        '''
        This check is currently disabled, as space is not implemented in
        DAOS C API yet.
        if size != pool_info.pi_space:
            self.fail("expected size {0} did not match actual size {1}"
                      .format(size, pool_info.pi_space))
        '''

        # number of targets
        PROGRESS_LOG.info("number of targets in pool: %s",
                          pool_info.pi_ntargets)
        if pool_info.pi_ntargets != len(self.hostlist):
            self.fail("found number of targets in pool did not match "
                      "expected number, 1. num targets: {0}".format(
                          pool_info.pi_ntargets))

        # number of disabled targets
        PROGRESS_LOG.info("number of disabled targets in pool: %s",
                          pool_info.pi_ndisabled)
        if pool_info.pi_ndisabled > 0:
            self.fail("found disabled targets, none expected to be disabled")

        # mode
        PROGRESS_LOG.info("pool mode: %s", pool_info.pi_mode)
        if pool_info.pi_mode != mode:
            self.fail("found different mode than expected. expected {0}, "
                      "found {1}.".format(mode, pool_info.pi_mode))

        # uid
        PROGRESS_LOG.info("expected uid is {0}".format(uid))
        if pool_info.pi_uid != uid:
            self.fail("found actual pool uid {0} does not match expected uid "
                      "{1}".format(pool_info.pi_uid, uid))

        # gid
        PROGRESS_LOG.info("expected gid is {0}".format(gid))
        if pool_info.pi_gid != gid:
            self.fail("found actual pool gid {0} does not match expected gid "
                      "{1}".format(pool_info.pi_gid, gid))
示例#7
0
    def test_multipool_rebuild(self):
        """
        Test ID: Rebuild-002
        Test Description: Expand on the basic test by rebuilding 2
        pools at once.

        Use Cases:
          -- multipool rebuild, single client, various object and record counds

        :avocado: tags=pool,rebuild,rebuildmulti
        """

        # the rebuild tests need to redo this stuff each time so not in setup
        # as it usually would be
        setid = self.params.get("setname", '/run/testparams/setnames/')
        server_group = self.params.get("server_group", '/server/',
                                       'daos_server')

        basepath = os.path.normpath(self.build_paths['PREFIX'] + "/../")
        tmp = self.build_paths['PREFIX'] + '/tmp'

        self.hostlist = self.params.get("test_machines", '/run/hosts/')
        hostfile = WriteHostFile.WriteHostFile(self.hostlist, tmp)

        try:
            ServerUtils.runServer(hostfile, server_group, basepath)

            # use the uid/gid of the user running the test, these should
            # be perfectly valid
            createuid = os.geteuid()
            creategid = os.getegid()

            # parameters used in pool create that are in yaml
            createmode = self.params.get("mode", '/run/testparams/createmode/')
            createsetid = self.params.get("setname",
                                          '/run/testparams/createset/')
            createsize = self.params.get("size", '/run/testparams/createsize/')

            # initialize python pool object then create the underlying
            # daos storage, the way the code is now the pools should be
            # on the same storage and have the same service leader
            pool1 = DaosPool(self.Context)
            pool2 = DaosPool(self.Context)
            pool1.create(createmode, createuid, creategid, createsize,
                         createsetid, None)
            pool2.create(createmode, createuid, creategid, createsize,
                         createsetid, None)

            # want an open connection during rebuild
            pool1.connect(1 << 1)
            pool2.connect(1 << 1)

            # create containers
            container1 = DaosContainer(self.Context)
            container1.create(pool1.handle)
            container2 = DaosContainer(self.Context)
            container2.create(pool2.handle)

            # now open them
            container1.open()
            container2.open()

            # how many objects and records are we creating
            objcount = self.params.get("objcount",
                                       '/run/testparams/numobjects/*')
            reccount = self.params.get("reccount",
                                       '/run/testparams/numrecords/*')
            if objcount == 0:
                reccount = 0

            # which rank to write to and kill
            rank = self.params.get("rank", '/run/testparams/ranks/*')

            # how much data to write with each key
            size = self.params.get("size", '/run/testparams/datasize/')

            # Putting the same data in both pools, at least for now to simplify
            # checking its correct
            saved_data = []
            for i in range(0, objcount):
                obj = None
                for j in range(0, reccount):

                    # make some stuff up and write
                    dkey = ''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(5))
                    akey = ''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(5))
                    data = ''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(size))

                    obj, tx = container1.write_an_obj(data, len(data), dkey,
                                                      akey, obj, rank)
                    obj, tx = container2.write_an_obj(data, len(data), dkey,
                                                      akey, obj, rank)

                    saved_data.append((obj, dkey, akey, data, tx))

                    # read the data back and make sure its correct
                    # containers
                    data2 = container1.read_an_obj(size, dkey, akey, obj, tx)
                    if data != data2.value:
                        self.fail(
                            "Wrote data P1, read it back, didn't match\n")

                    # containers
                    data2 = container2.read_an_obj(size, dkey, akey, obj, tx)
                    if data != data2.value:
                        self.fail(
                            "Wrote data P2, read it back, didn't match\n")

            # kill a server
            server = DaosServer(self.Context, server_group, rank)
            server.kill(1)

            # temporarily, the exclude of a failed target must be done
            # manually
            pool1.exclude([rank])
            pool2.exclude([rank])

            # check that rebuild finishes, no errors, progress data as
            # know it to be.  Check pool 1 first then we'll check 2 below.
            while True:
                pool1.pool_query()
                if pool1.pool_info.pi_rebuild_st.rs_done == 1:
                    break
                else:
                    time.sleep(2)

            # check there are no errors and other data matches what we
            # apriori know to be true,
            if pool1.pool_info.pi_ndisabled != 1:
                self.fail(
                    "P1 number disabled targets reporting incorrectly: {}".
                    format(pool1.pool_info.pi_ndisabled))
            if pool1.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("P1 rebuild error reported: {}".format(
                    pool1.pool_info.pi_rebuild_st.rs_errno))
            if pool1.pool_info.pi_rebuild_st.rs_obj_nr != objcount:
                self.fail("P1 rebuilt objs not as expected: {0} {1}".format(
                    pool1.pool_info.pi_rebuild_st.rs_obj_nr, objcount))
            if pool1.pool_info.pi_rebuild_st.rs_rec_nr != (reccount *
                                                           objcount):
                self.fail("P1 rebuilt recs not as expected: {0} {1}".format(
                    pool1.pool_info.pi_rebuild_st.rs_rec_nr,
                    reccount * objcount))

            # now that the rebuild finished verify the records are correct
            for tup in saved_data:
                data2 = container1.read_an_obj(len(tup[3]), tup[1], tup[2],
                                               tup[0], tup[4])
                if tup[3] != data2.value:
                    self.fail("after rebuild data didn't check out")

            # now check the other pool
            while True:
                pool2.pool_query()
                if pool2.pool_info.pi_rebuild_st.rs_done == 1:
                    break
                else:
                    time.sleep(2)

            # check there are no errors and other data matches what we
            # apriori know to be true
            if pool2.pool_info.pi_ndisabled != 1:
                self.fail(
                    "Number disabled targets reporting incorrectly: {}".format(
                        pool2.pool_info.pi_ndisabled))
            if pool2.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error reported: {}".format(
                    pool2.pool_info.pi_rebuild_st.rs_errno))
            if pool2.pool_info.pi_rebuild_st.rs_obj_nr != objcount:
                self.fail("Rebuilt objs not as expected: {0} {1}".format(
                    pool2.pool_info.pi_rebuild_st.rs_obj_nr, objcount))
            if pool2.pool_info.pi_rebuild_st.rs_rec_nr != (reccount *
                                                           objcount):
                self.fail("Rebuilt recs not as expected: {0} {1}".format(
                    pool2.pool_info.pi_rebuild_st.rs_rec_nr,
                    (reccount * objcount)))

            # now that the rebuild finished verify the records are correct
            for tup in saved_data:
                data2 = container2.read_an_obj(len(tup[3]), tup[1], tup[2],
                                               tup[0], tup[4])
                if tup[3] != data2.value:
                    self.fail("after rebuild data didn't check out")

        except DaosApiError as e:
            print(e)
            print(traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")

        finally:
            ServerUtils.stopServer(hosts=self.hostlist)
            os.remove(hostfile)
            CheckForPool.CleanupPools(self.hostlist)
            ServerUtils.killServer(self.hostlist)
示例#8
0
class PoolSvc(TestWithServers):
    """
    Tests svc argument while pool create.
    :avocado: recursive
    """
    def tearDown(self):
        try:
            if self.pool is not None and self.pool.attached:
                self.pool.destroy(1)
        finally:
            super(PoolSvc, self).tearDown()

    def test_poolsvc(self):
        """
        Test svc arg during pool create.

        :avocado: tags=pool,svc
        """

        # parameters used in pool create
        createmode = self.params.get("mode", '/run/createtests/createmode/*/')
        createuid = os.geteuid()
        creategid = os.getegid()
        createsetid = self.params.get("setname", '/run/createtests/createset/')
        createsize = self.params.get("size", '/run/createtests/createsize/')
        createsvc = self.params.get("svc", '/run/createtests/createsvc/*/')

        expected_result = createsvc[1]

        try:
            # initialize a python pool object then create the underlying
            # daos storage
            self.pool = DaosPool(self.context)
            self.pool.create(createmode, createuid, creategid, createsize,
                             createsetid, None, None, createsvc[0])
            self.pool.connect(1 << 1)

            # checking returned rank list for server more than 1
            iterator = 0
            while (int(self.pool.svc.rl_ranks[iterator]) > 0
                   and int(self.pool.svc.rl_ranks[iterator]) <= createsvc[0]
                   and int(self.pool.svc.rl_ranks[iterator]) != 999999):
                iterator += 1
            if iterator != createsvc[0]:
                self.fail("Length of Returned Rank list is not equal to "
                          "the number of Pool Service members.\n")
            rank_list = []
            for iterator in range(createsvc[0]):
                rank_list.append(int(self.pool.svc.rl_ranks[iterator]))
                if len(rank_list) != len(set(rank_list)):
                    self.fail("Duplicate values in returned rank list")

            self.pool.pool_query()
            leader = self.pool.pool_info.pi_leader
            if createsvc[0] == 3:
                # kill pool leader and exclude it
                self.pool.pool_svc_stop()
                self.pool.exclude([leader])
                # perform pool disconnect, try connect again and disconnect
                self.pool.disconnect()
                self.pool.connect(1 << 1)
                self.pool.disconnect()
                # kill another server which is not a leader and exclude it
                server = DaosServer(self.context, self.server_group,
                                    leader - 1)
                server.kill(1)
                self.pool.exclude([leader - 1])
                # perform pool connect
                self.pool.connect(1 << 1)

            if expected_result in ['FAIL']:
                self.fail("Test was expected to fail but it passed.\n")

        except DaosApiError as excep:
            print(excep)
            print(traceback.format_exc())
            if expected_result == 'PASS':
                self.fail("Test was expected to pass but it failed.\n")
示例#9
0
文件: bad_query.py 项目: morsiee/daos
    def test_query(self):
        """
        Pass bad parameters to pool query

        :avocado: tags=pool,poolquery,badparam,badquery
        """
        # parameters used in pool create/connect
        connectmode = self.params.get("mode", '/run/querytests/connectmode/')
        createmode = self.params.get("mode", '/run/querytests/createmode/')
        createuid = self.params.get("uid", '/run/querytests/createuid/')
        creategid = self.params.get("gid", '/run/querytests/creategid/')
        createsetid = self.params.get("setname", '/run/querytests/createset/')
        createsize = self.params.get("size", '/run/querytests/createsize/')

        # Accumulate a list of pass/fail indicators representing what is
        # expected for each parameter then "and" them to determine the
        # expected result of the test
        expected_for_param = []

        handlelist = self.params.get("handle", '/run/querytests/handles/*/')
        handle = handlelist[0]
        expected_for_param.append(handlelist[1])

        infolist = self.params.get("info", '/run/querytests/infoptr/*/')
        dummy_infoptr = infolist[0]
        expected_for_param.append(infolist[1])

        # if any parameter is FAIL then the test should FAIL, in this test
        # virtually everyone should FAIL since we are testing bad parameters
        expected_result = 'PASS'
        for result in expected_for_param:
            if result == 'FAIL':
                expected_result = 'FAIL'
                break

        try:
            # setup the DAOS python API
            with open('../../../.build_vars.json') as build_file:
                data = json.load(build_file)
            context = DaosContext(data['PREFIX'] + '/lib/')

            # initialize a python pool object then create the underlying
            # daos storage
            pool = DaosPool(context)
            pool.create(createmode, createuid, creategid,
                        createsize, createsetid, None)

            pool.connect(connectmode)

            # trash the pool handle value
            if not handle == 'VALID':
                pool.handle = handle

            pool.pool_query()

            if expected_result in ['FAIL']:
                self.fail("Test was expected to fail but it passed.\n")

        except DaosApiError as excep:
            print(excep)
            print(traceback.format_exc())
            if expected_result in ['PASS']:
                self.fail("Test was expected to pass but it failed.\n")
示例#10
0
    def test_multipool_rebuild(self):
        """
        Test ID: Rebuild-002
        Test Description: Expand on the basic test by rebuilding 2
        pools at once.

        Use Cases:
          -- multipool rebuild, single client, various object and record counds

        :avocado: tags=pool,rebuild,rebuildmulti
        """
        try:
            # initialize python pool object then create the underlying
            # daos storage, the way the code is now the pools should be
            # on the same storage and have the same service leader
            pool1 = DaosPool(self.context)
            pool2 = DaosPool(self.context)
            pool1.create(self.createmode, self.createuid, self.creategid,
                         self.createsize, self.createsetid)
            pool2.create(self.createmode, self.createuid, self.creategid,
                         self.createsize, self.createsetid)

            # want an open connection during rebuild
            pool1.connect(1 << 1)
            pool2.connect(1 << 1)

            # create containers
            container1 = DaosContainer(self.context)
            container1.create(pool1.handle)
            container2 = DaosContainer(self.context)
            container2.create(pool2.handle)

            # now open them
            container1.open()
            container2.open()

            # Putting the same data in both pools, at least for now to simplify
            # checking its correct
            saved_data = []
            for _objc in range(self.objcount):
                obj = None
                for _recc in range(self.reccount):

                    # make some stuff up and write
                    dkey = (
                        ''.join(random.choice(string.ascii_uppercase +
                                              string.digits) for _ in range(5)))
                    akey = (
                        ''.join(random.choice(string.ascii_uppercase +
                                              string.digits) for _ in range(5)))
                    data = (
                        ''.join(random.choice(string.ascii_uppercase +
                                              string.digits) for _ in
                                range(self.size)))

                    # Used DAOS_OC_R1S_SPEC_RANK
                    # 1 replica with specified rank
                    obj, txn = container1.write_an_obj(data, len(data), dkey,
                                                       akey, obj, self.rank,
                                                       obj_cls=15)
                    obj, txn = container2.write_an_obj(data, len(data), dkey,
                                                       akey, obj, self.rank,
                                                       obj_cls=15)
                    saved_data.append((obj, dkey, akey, data, txn))

                    # read the data back and make sure its correct containers
                    data2 = container1.read_an_obj(self.size, dkey, akey, obj,
                                                   txn)
                    if data != data2.value:
                        self.fail("Wrote data P1, read it back, didn't match\n")
                    data2 = container2.read_an_obj(self.size, dkey, akey, obj,
                                                   txn)
                    if data != data2.value:
                        self.fail("Wrote data P2, read it back, didn't match\n")

            # kill a server
            server = DaosServer(self.context, self.server_group, self.rank)
            server.kill(1)

            # temporarily, the exclude of a failed target must be done
            # manually
            pool1.exclude([self.rank])
            pool2.exclude([self.rank])

            # check that rebuild finishes, no errors, progress data as
            # know it to be.  Check pool 1 first then we'll check 2 below.
            while True:
                pool1.pool_query()
                if pool1.pool_info.pi_rebuild_st.rs_done == 1:
                    break
                else:
                    time.sleep(2)

            # check there are no errors and other data matches what we
            # apriori know to be true,
            if pool1.pool_info.pi_ndisabled != 1:
                self.fail("P1 number disabled targets reporting incorrectly: {}"
                          .format(pool1.pool_info.pi_ndisabled))
            if pool1.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("P1 rebuild error reported: {}"
                          .format(pool1.pool_info.pi_rebuild_st.rs_errno))
            if pool1.pool_info.pi_rebuild_st.rs_obj_nr != self.objcount:
                self.fail("P1 rebuilt objs not as expected: {0} {1}"
                          .format(pool1.pool_info.pi_rebuild_st.rs_obj_nr,
                                  self.objcount))
            if (pool1.pool_info.pi_rebuild_st.rs_rec_nr !=
                    (self.reccount*self.objcount)):
                self.fail("P1 rebuilt recs not as expected: {0} {1}"
                          .format(pool1.pool_info.pi_rebuild_st.rs_rec_nr,
                                  self.reccount*self.objcount))

            # now that the rebuild finished verify the records are correct
            for tup in saved_data:
                data2 = container1.read_an_obj(len(tup[3]), tup[1], tup[2],
                                               tup[0], tup[4])
                if tup[3] != data2.value:
                    self.fail("after rebuild data didn't check out")

            # now check the other pool
            while True:
                pool2.pool_query()
                if pool2.pool_info.pi_rebuild_st.rs_done == 1:
                    break
                else:
                    time.sleep(2)

            # check there are no errors and other data matches what we
            # apriori know to be true
            if pool2.pool_info.pi_ndisabled != 1:
                self.fail("Number disabled targets reporting incorrectly: {}"
                          .format(pool2.pool_info.pi_ndisabled))
            if pool2.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error reported: {}"
                          .format(pool2.pool_info.pi_rebuild_st.rs_errno))
            if pool2.pool_info.pi_rebuild_st.rs_obj_nr != self.objcount:
                self.fail("Rebuilt objs not as expected: {0} {1}"
                          .format(pool2.pool_info.pi_rebuild_st.rs_obj_nr,
                                  self.objcount))
            if (pool2.pool_info.pi_rebuild_st.rs_rec_nr !=
                    (self.reccount*self.objcount)):
                self.fail("Rebuilt recs not as expected: {0} {1}".
                          format(pool2.pool_info.pi_rebuild_st.rs_rec_nr,
                                 (self.reccount*self.objcount)))

            # now that the rebuild finished verify the records are correct
            for tup in saved_data:
                data2 = container2.read_an_obj(len(tup[3]), tup[1], tup[2],
                                               tup[0], tup[4])
                if tup[3] != data2.value:
                    self.fail("after rebuild data didn't check out")

        except DaosApiError as excp:
            print (excp)
            print (traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")
示例#11
0
class NvmeIo(avocado.Test):
    """
    Test Class Description:
        Test the general Metadata operations and boundary conditions.
    """

    def setUp(self):
        self.pool = None
        self.hostlist = None
        self.hostfile_clients = None
        self.hostfile = None
        self.out_queue = None
        self.pool_connect = False

        with open('../../../.build_vars.json') as json_f:
            build_paths = json.load(json_f)

        self.basepath = os.path.normpath(build_paths['PREFIX']  + "/../")
        self.server_group = self.params.get("name", '/server_config/',
                                            'daos_server')
        self.context = DaosContext(build_paths['PREFIX'] + '/lib/')
        self.d_log = DaosLog(self.context)
        self.hostlist = self.params.get("servers", '/run/hosts/*')
        self.hostfile = write_host_file.write_host_file(self.hostlist,
                                                        self.workdir)
        #Start Server
        server_utils.run_server(self.hostfile, self.server_group, self.basepath)

    def tearDown(self):
        try:
            if self.pool_connect:
                self.pool.disconnect()
                self.pool.destroy(1)
        finally:
            server_utils.stop_server(hosts=self.hostlist)

    def verify_pool_size(self, original_pool_info, ior_args):
        """
        Function is to validate the pool size
        original_pool_info: Pool info prior to IOR
        ior_args: IOR args to calculate the file size
        """
        #Get the current pool size for comparison
        current_pool_info = self.pool.pool_query()
        #if Transfer size is < 4K, Pool size will verified against NVMe, else
        #it will be checked against SCM
        if ior_args['stripe_size'] >= 4096:
            print("Size is > 4K,Size verification will be done with NVMe size")
            storage_index = 1
        else:
            print("Size is < 4K,Size verification will be done with SCM size")
            storage_index = 0

        free_pool_size = (
            original_pool_info.pi_space.ps_space.s_free[storage_index]
            - current_pool_info.pi_space.ps_space.s_free[storage_index])

        obj_multiplier = 1
        replica_number = re.findall(r'\d+', "ior_args['object_class']")
        if replica_number:
            obj_multiplier = int(replica_number[0])
        expected_pool_size = (ior_args['slots'] * ior_args['block_size'] *
                              obj_multiplier)

        if free_pool_size < expected_pool_size:
            raise DaosTestError(
                'Pool Free Size did not match Actual = {} Expected = {}'
                .format(free_pool_size, expected_pool_size))

    @avocado.fail_on(DaosApiError)
    def test_nvme_io(self):
        """
        Test ID: DAOS-2082
        Test Description: Test will run IOR with standard and non standard
        sizes.IOR will be run for all Object type supported. Purpose is to
        verify pool size (SCM and NVMe) for IOR file.
        This test is running multiple IOR on same server start instance.
        :avocado: tags=nvme,nvme_io,large
        """
        ior_args = {}

        hostlist_clients = self.params.get("clients", '/run/hosts/*')
        tests = self.params.get("ior_sequence", '/run/ior/*')
        object_type = self.params.get("object_type", '/run/ior/*')
        #Loop for every IOR object type
        for obj_type in object_type:
            for ior_param in tests:
                self.hostfile_clients = write_host_file.write_host_file(
                    hostlist_clients,
                    self.workdir,
                    ior_param[4])
                #There is an issue with NVMe if Transfer size>64M, Skipped this
                #sizes for now
                if ior_param[2] > 67108864:
                    print ("Xfersize > 64M getting failed, DAOS-1264")
                    continue

                self.pool = DaosPool(self.context)
                self.pool.create(self.params.get("mode",
                                                 '/run/pool/createmode/*'),
                                 os.geteuid(),
                                 os.getegid(),
                                 ior_param[0],
                                 self.params.get("setname",
                                                 '/run/pool/createset/*'),
                                 nvme_size=ior_param[1])
                self.pool.connect(1 << 1)
                self.pool_connect = True
                createsvc = self.params.get("svcn", '/run/pool/createsvc/')
                svc_list = ""
                for i in range(createsvc):
                    svc_list += str(int(self.pool.svc.rl_ranks[i])) + ":"
                svc_list = svc_list[:-1]

                ior_args['client_hostfile'] = self.hostfile_clients
                ior_args['pool_uuid'] = self.pool.get_uuid_str()
                ior_args['svc_list'] = svc_list
                ior_args['basepath'] = self.basepath
                ior_args['server_group'] = self.server_group
                ior_args['tmp_dir'] = self.workdir
                ior_args['iorflags'] = self.params.get("iorflags",
                                                       '/run/ior/*')
                ior_args['iteration'] = self.params.get("iteration",
                                                        '/run/ior/*')
                ior_args['stripe_size'] = ior_param[2]
                ior_args['block_size'] = ior_param[3]
                ior_args['stripe_count'] = self.params.get("stripecount",
                                                           '/run/ior/*')
                ior_args['async_io'] = self.params.get("asyncio",
                                                       '/run/ior/*')
                ior_args['object_class'] = obj_type
                ior_args['slots'] = ior_param[4]

                #IOR is going to use the same --daos.stripeSize,
                #--daos.recordSize and Transfer size.
                try:
                    size_before_ior = self.pool.pool_query()
                    ior_utils.run_ior(ior_args['client_hostfile'],
                                      ior_args['iorflags'],
                                      ior_args['iteration'],
                                      ior_args['block_size'],
                                      ior_args['stripe_size'],
                                      ior_args['pool_uuid'],
                                      ior_args['svc_list'],
                                      ior_args['stripe_size'],
                                      ior_args['stripe_size'],
                                      ior_args['stripe_count'],
                                      ior_args['async_io'],
                                      ior_args['object_class'],
                                      ior_args['basepath'],
                                      ior_args['slots'],
                                      filename=str(uuid.uuid4()),
                                      display_output=True)
                    self.verify_pool_size(size_before_ior, ior_args)
                except ior_utils.IorFailed as exe:
                    print (exe)
                    print (traceback.format_exc())
                    self.fail()
                try:
                    if self.pool_connect:
                        self.pool.disconnect()
                        self.pool_connect = False
                    if self.pool:
                        self.pool.destroy(1)
                except DaosApiError as exe:
                    print (exe)
                    self.fail("Failed to Destroy/Disconnect the Pool")
示例#12
0
    def test_simple_rebuild(self):
        """
        Test ID: Rebuild-001

        Test Description: The most basic rebuild test.

        Use Cases:
          -- single pool rebuild, single client, various reord/object
             counts

        :avocado: tags=pool,rebuild,rebuildsimple
        """
        try:

            # initialize a python pool object then create the underlying
            # daos storage
            pool = DaosPool(self.context)
            pool.create(self.createmode, self.createuid, self.creategid,
                        self.createsize, self.createsetid)

            # want an open connection during rebuild
            pool.connect(1 << 1)

            # get pool status we want to test later
            pool.pool_query()
            if pool.pool_info.pi_ndisabled != 0:
                self.fail(
                    "Number of disabled targets reporting incorrectly.\n")
            if pool.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error but rebuild hasn't run.\n")
            if pool.pool_info.pi_rebuild_st.rs_done != 1:
                self.fail("Rebuild is running but device hasn't failed yet.\n")
            if pool.pool_info.pi_rebuild_st.rs_obj_nr != 0:
                self.fail("Rebuilt objs not zero.\n")
            if pool.pool_info.pi_rebuild_st.rs_rec_nr != 0:
                self.fail("Rebuilt recs not zero.\n")

            # create a container
            container = DaosContainer(self.context)
            container.create(pool.handle)

            # now open it
            container.open()

            saved_data = []
            for _objc in range(self.objcount):
                obj = None
                for _recc in range(self.reccount):
                    # make some stuff up and write
                    dkey = (''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(5)))
                    akey = (''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(5)))
                    data = (''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(self.size)))

                    obj, txn = container.write_an_obj(data,
                                                      len(data),
                                                      dkey,
                                                      akey,
                                                      obj,
                                                      self.rank,
                                                      obj_cls=16)

                    saved_data.append((obj, dkey, akey, data, txn))

                    # read the data back and make sure its correct
                    data2 = container.read_an_obj(self.size, dkey, akey, obj,
                                                  txn)
                    if data != data2.value:
                        self.fail("Write data 1, read it back, didn't match\n")

            # kill a server that has
            server = DaosServer(self.context, self.server_group, self.rank)
            server.kill(1)

            # temporarily, the exclude of a failed target must be done manually
            pool.exclude([self.rank])

            while True:
                # get the pool/rebuild status again
                pool.pool_query()
                if pool.pool_info.pi_rebuild_st.rs_done == 1:
                    break
                else:
                    time.sleep(2)

            if pool.pool_info.pi_ndisabled != 1:
                self.fail(
                    "Number of disabled targets reporting incorrectly: {}".
                    format(pool.pool_info.pi_ndisabled))
            if pool.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error reported: {}".format(
                    pool.pool_info.pi_rebuild_st.rs_errno))
            if pool.pool_info.pi_rebuild_st.rs_obj_nr != self.objcount:
                self.fail("Rebuilt objs not as expected: {0} {1}".format(
                    pool.pool_info.pi_rebuild_st.rs_obj_nr, self.objcount))
            if (pool.pool_info.pi_rebuild_st.rs_rec_nr !=
                (self.reccount * self.objcount)):
                self.fail("Rebuilt recs not as expected: {0} {1}".format(
                    pool.pool_info.pi_rebuild_st.rs_rec_nr,
                    self.reccount * self.objcount))

            # now that the rebuild finished verify the records are correct
            for tup in saved_data:
                data2 = container.read_an_obj(len(tup[3]), tup[1], tup[2],
                                              tup[0], tup[4])
                if tup[3] != data2.value:
                    self.fail("after rebuild data didn't check out")

        except DaosApiError as excp:
            print(excp)
            print(traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")
示例#13
0
    def test_multipool_rebuild(self):
        """
        Test ID: Rebuild-002
        Test Description: Expand on the basic test by rebuilding 2
        pools at once.

        Use Cases:
          -- multipool rebuild, single client, various object and record counds

        :avocado: tags=pool,rebuild,rebuildmulti
        """
        try:
            # initialize python pool object then create the underlying
            # daos storage, the way the code is now the pools should be
            # on the same storage and have the same service leader
            pool1 = DaosPool(self.context)
            pool2 = DaosPool(self.context)
            pool1.create(self.createmode, self.createuid, self.creategid,
                         self.createsize, self.createsetid)
            pool2.create(self.createmode, self.createuid, self.creategid,
                         self.createsize, self.createsetid)

            # want an open connection during rebuild
            pool1.connect(1 << 1)
            pool2.connect(1 << 1)

            # create containers
            container1 = DaosContainer(self.context)
            container1.create(pool1.handle)
            container2 = DaosContainer(self.context)
            container2.create(pool2.handle)

            # now open them
            container1.open()
            container2.open()

            # Putting the same data in both pools, at least for now to simplify
            # checking its correct
            saved_data = []
            for _objc in range(self.objcount):
                obj = None
                for _recc in range(self.reccount):

                    # make some stuff up and write
                    dkey = (''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(5)))
                    akey = (''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(5)))
                    data = (''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(self.size)))

                    # Used DAOS_OC_R1S_SPEC_RANK
                    # 1 replica with specified rank
                    obj, txn = container1.write_an_obj(data,
                                                       len(data),
                                                       dkey,
                                                       akey,
                                                       obj,
                                                       self.rank,
                                                       obj_cls=15)
                    obj, txn = container2.write_an_obj(data,
                                                       len(data),
                                                       dkey,
                                                       akey,
                                                       obj,
                                                       self.rank,
                                                       obj_cls=15)
                    saved_data.append((obj, dkey, akey, data, txn))

                    # read the data back and make sure its correct containers
                    data2 = container1.read_an_obj(self.size, dkey, akey, obj,
                                                   txn)
                    if data != data2.value:
                        self.fail(
                            "Wrote data P1, read it back, didn't match\n")
                    data2 = container2.read_an_obj(self.size, dkey, akey, obj,
                                                   txn)
                    if data != data2.value:
                        self.fail(
                            "Wrote data P2, read it back, didn't match\n")

            # kill a server
            server = DaosServer(self.context, self.server_group, self.rank)
            server.kill(1)

            # temporarily, the exclude of a failed target must be done
            # manually
            pool1.exclude([self.rank])
            pool2.exclude([self.rank])

            # check that rebuild finishes, no errors, progress data as
            # know it to be.  Check pool 1 first then we'll check 2 below.
            while True:
                pool1.pool_query()
                if pool1.pool_info.pi_rebuild_st.rs_done == 1:
                    break
                else:
                    time.sleep(2)

            # check there are no errors and other data matches what we
            # apriori know to be true,
            if pool1.pool_info.pi_ndisabled != 1:
                self.fail(
                    "P1 number disabled targets reporting incorrectly: {}".
                    format(pool1.pool_info.pi_ndisabled))
            if pool1.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("P1 rebuild error reported: {}".format(
                    pool1.pool_info.pi_rebuild_st.rs_errno))
            if pool1.pool_info.pi_rebuild_st.rs_obj_nr != self.objcount:
                self.fail("P1 rebuilt objs not as expected: {0} {1}".format(
                    pool1.pool_info.pi_rebuild_st.rs_obj_nr, self.objcount))
            if (pool1.pool_info.pi_rebuild_st.rs_rec_nr !=
                (self.reccount * self.objcount)):
                self.fail("P1 rebuilt recs not as expected: {0} {1}".format(
                    pool1.pool_info.pi_rebuild_st.rs_rec_nr,
                    self.reccount * self.objcount))

            # now that the rebuild finished verify the records are correct
            for tup in saved_data:
                data2 = container1.read_an_obj(len(tup[3]), tup[1], tup[2],
                                               tup[0], tup[4])
                if tup[3] != data2.value:
                    self.fail("after rebuild data didn't check out")

            # now check the other pool
            while True:
                pool2.pool_query()
                if pool2.pool_info.pi_rebuild_st.rs_done == 1:
                    break
                else:
                    time.sleep(2)

            # check there are no errors and other data matches what we
            # apriori know to be true
            if pool2.pool_info.pi_ndisabled != 1:
                self.fail(
                    "Number disabled targets reporting incorrectly: {}".format(
                        pool2.pool_info.pi_ndisabled))
            if pool2.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error reported: {}".format(
                    pool2.pool_info.pi_rebuild_st.rs_errno))
            if pool2.pool_info.pi_rebuild_st.rs_obj_nr != self.objcount:
                self.fail("Rebuilt objs not as expected: {0} {1}".format(
                    pool2.pool_info.pi_rebuild_st.rs_obj_nr, self.objcount))
            if (pool2.pool_info.pi_rebuild_st.rs_rec_nr !=
                (self.reccount * self.objcount)):
                self.fail("Rebuilt recs not as expected: {0} {1}".format(
                    pool2.pool_info.pi_rebuild_st.rs_rec_nr,
                    (self.reccount * self.objcount)))

            # now that the rebuild finished verify the records are correct
            for tup in saved_data:
                data2 = container2.read_an_obj(len(tup[3]), tup[1], tup[2],
                                               tup[0], tup[4])
                if tup[3] != data2.value:
                    self.fail("after rebuild data didn't check out")

        except DaosApiError as excp:
            print(excp)
            print(traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")

        finally:
            server_utils.stop_server(hosts=self.hostlist_servers)
            check_for_pool.cleanup_pools(self.hostlist_servers)
            server_utils.kill_server(self.hostlist_servers)
示例#14
0
class DestroyRebuild(Test):

    """
    Test Class Description:
    This test verifies destruction of a pool that is rebuilding.

    :avocado: tags=pool,pooldestroy,rebuild,desreb
    """

    build_paths = []
    server_group = ""
    context = None
    pool = None
    hostfile = ""

    def setUp(self):
        """ setup for the test """
        self.agent_sessions = None
        # get paths from the build_vars generated by build
        with open('../../../.build_vars.json') as build_file:
            build_paths = json.load(build_file)
        self.context = DaosContext(build_paths['PREFIX'] + '/lib/')
        self.basepath = os.path.normpath(build_paths['PREFIX'] + "/../")

        # generate a hostfile
        self.hostlist = self.params.get("test_machines", '/run/hosts/')
        self.hostfile = write_host_file.write_host_file(self.hostlist,
                                                        self.workdir)

        # fire up the DAOS servers
        self.server_group = self.params.get("name", '/run/server_config/',
                                            'daos_server')
        self.agent_sessions = AgentUtils.run_agent(self.basepath, self.hostlist)
        server_utils.run_server(self.hostfile, self.server_group,
                                build_paths['PREFIX'] + '/../')

        # create a pool to test with
        createmode = self.params.get("mode", '/run/pool/createmode/')
        createuid = self.params.get("uid", '/run/pool/createuid/')
        creategid = self.params.get("gid", '/run/pool/creategid/')
        createsetid = self.params.get("setname", '/run/pool/createset/')
        createsize = self.params.get("size", '/run/pool/createsize/')
        self.pool = DaosPool(self.context)
        self.pool.create(createmode, createuid, creategid, createsize,
                         createsetid)
        self.pool.get_uuid_str()

        time.sleep(2)

    def tearDown(self):
        """ cleanup after the test """

        try:
            os.remove(self.hostfile)
            if self.pool:
                self.pool.destroy(1)
        finally:
            if self.agent_sessions:
                AgentUtils.stop_agent(self.hostlist, self.agent_sessions)
            server_utils.stop_server(hosts=self.hostlist)


    def test_destroy_while_rebuilding(self):
        """
        :avocado: tags=pool,pooldestroy,rebuild,desreb
        """
        try:
            print("\nsetup complete, starting test\n")

            # create a server object that references on of our pool target hosts
            # and then kill it
            svr_to_kill = int(self.params.get("rank_to_kill",
                                              '/run/testparams/ranks/'))
            server = DaosServer(self.context, bytes(self.server_group),
                                svr_to_kill)

            print("created server ")

            # BUG if you don't connect the rebuild doesn't start correctly
            self.pool.connect(1 << 1)
            status = self.pool.pool_query()
            if not status.pi_ntargets == len(self.hostlist):
                self.fail("target count wrong.\n")
            if not status.pi_ndisabled == 0:
                self.fail("disabled target count wrong.\n")

            print("connect ")

            time.sleep(1)
            server.kill(1)

            print("killed server ")

            # exclude the target from the dead server
            self.pool.exclude([svr_to_kill])

            print("exclude target ")

            #self.pool.disconnect()
            #print "disconnect "

            # the rebuild won't take long since there is no data so do
            # the destroy quickly
            self.pool.destroy(1)
            print("destroy ")

        except DaosApiError as excep:
            print(excep)
            print(traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")
示例#15
0
    def test_simple_rebuild(self):
        """
        Test ID: Rebuild-001

        Test Description: The most basic rebuild test.

        Use Cases:
          -- single pool rebuild, single client, various reord/object
             counts

        :avocado: tags=pool,rebuild,rebuildsimple
        """
        try:

            # initialize a python pool object then create the underlying
            # daos storage
            pool = DaosPool(self.context)
            pool.create(self.createmode, self.createuid, self.creategid,
                        self.createsize, self.createsetid)

            # want an open connection during rebuild
            pool.connect(1 << 1)

            # get pool status we want to test later
            pool.pool_query()
            if pool.pool_info.pi_ndisabled != 0:
                self.fail("Number of disabled targets reporting incorrectly.\n")
            if pool.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error but rebuild hasn't run.\n")
            if pool.pool_info.pi_rebuild_st.rs_done != 1:
                self.fail("Rebuild is running but device hasn't failed yet.\n")
            if pool.pool_info.pi_rebuild_st.rs_obj_nr != 0:
                self.fail("Rebuilt objs not zero.\n")
            if pool.pool_info.pi_rebuild_st.rs_rec_nr != 0:
                self.fail("Rebuilt recs not zero.\n")

            # create a container
            container = DaosContainer(self.context)
            container.create(pool.handle)

            # now open it
            container.open()

            saved_data = []
            for _objc in range(self.objcount):
                obj = None
                for _recc in range(self.reccount):
                    # make some stuff up and write
                    dkey = (
                        ''.join(random.choice(string.ascii_uppercase +
                                              string.digits) for _ in range(5)))
                    akey = (
                        ''.join(random.choice(string.ascii_uppercase +
                                              string.digits) for _ in range(5)))
                    data = (''.join(random.choice(string.ascii_uppercase +
                                                  string.digits)
                                    for _ in range(self.size)))

                    obj, txn = container.write_an_obj(data, len(data), dkey,
                                                      akey, obj, self.rank,
                                                      obj_cls=16)

                    saved_data.append((obj, dkey, akey, data, txn))

                    # read the data back and make sure its correct
                    data2 = container.read_an_obj(self.size, dkey, akey, obj,
                                                  txn)
                    if data != data2.value:
                        self.fail("Write data 1, read it back, didn't match\n")

            # kill a server that has
            server = DaosServer(self.context, self.server_group, self.rank)
            server.kill(1)

            # temporarily, the exclude of a failed target must be done manually
            pool.exclude([self.rank])

            while True:
                # get the pool/rebuild status again
                pool.pool_query()
                if pool.pool_info.pi_rebuild_st.rs_done == 1:
                    break
                else:
                    time.sleep(2)

            if pool.pool_info.pi_ndisabled != 1:
                self.fail("Number of disabled targets reporting incorrectly: {}"
                          .format(pool.pool_info.pi_ndisabled))
            if pool.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error reported: {}"
                          .format(pool.pool_info.pi_rebuild_st.rs_errno))
            if pool.pool_info.pi_rebuild_st.rs_obj_nr != self.objcount:
                self.fail("Rebuilt objs not as expected: {0} {1}"
                          .format(pool.pool_info.pi_rebuild_st.rs_obj_nr,
                                  self.objcount))
            if (pool.pool_info.pi_rebuild_st.rs_rec_nr !=
                    (self.reccount*self.objcount)):
                self.fail("Rebuilt recs not as expected: {0} {1}"
                          .format(pool.pool_info.pi_rebuild_st.rs_rec_nr,
                                  self.reccount*self.objcount))

            # now that the rebuild finished verify the records are correct
            for tup in saved_data:
                data2 = container.read_an_obj(len(tup[3]), tup[1], tup[2],
                                              tup[0], tup[4])
                if tup[3] != data2.value:
                    self.fail("after rebuild data didn't check out")

        except DaosApiError as excp:
            print (excp)
            print (traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")
class FullPoolContainerCreate(Test):
    """
    Class for test to create a container in a pool with no remaining free space.
    """

    def setUp(self):
        self.agent_sessions = None
        # get paths from the build_vars generated by build
        with open(os.path.join(os.path.dirname(os.path.realpath(__file__)),
                               "../../../../.build_vars.json")) as build_file:
            build_paths = json.load(build_file)
        self.basepath = os.path.normpath(build_paths['PREFIX'] + "/../")
        self.server_group = self.params.get("name", '/server_config/',
                                            'daos_default_oops')

        self.context = DaosContext(build_paths['PREFIX'] + '/lib/')

        self.cont = None
        self.cont2 = None
        self.pool = DaosPool(self.context)
        self.d_log = DaosLog(self.context)
        self.hostlist = self.params.get("test_machines1", '/hosts/')
        self.hostfile = write_host_file.write_host_file(self.hostlist,
                                                        self.workdir)

        self.agent_sessions = AgentUtils.run_agent(self.basepath, self.hostlist)
        server_utils.run_server(self.hostfile, self.server_group, self.basepath)

    def tearDown(self):
        # shut 'er down
        """
        wrap pool destroy in a try; in case pool create didn't succeed, we
        still need the server to be shut down in any case
        """
        try:
            self.pool.destroy(1)
        finally:
            if self.agent_sessions:
                AgentUtils.stop_agent(self.hostlist, self.agent_sessions)
            server_utils.stop_server(hosts=self.hostlist)

    def test_no_space_cont_create(self):
        """
        :avocado: tags=pool,cont,fullpoolcontcreate,small,vm
        """
        # full storage rc
        err = "-1007"
        # probably should be -1007, revisit later
        err2 = "-1009"

        # create pool
        mode = self.params.get("mode", '/conttests/createmode/')
        self.d_log.debug("mode is {0}".format(mode))
        uid = os.geteuid()
        gid = os.getegid()
        # 16 mb pool, minimum size currently possible
        size = 16777216

        self.d_log.debug("creating pool")
        self.pool.create(mode, uid, gid, size, self.server_group, None)
        self.d_log.debug("created pool")

        # connect to the pool
        self.d_log.debug("connecting to pool")
        self.pool.connect(1 << 1)
        self.d_log.debug("connected to pool")

        # query the pool
        self.d_log.debug("querying pool info")
        dummy_pool_info = self.pool.pool_query()
        self.d_log.debug("queried pool info")

        # create a container
        try:
            self.d_log.debug("creating container")
            self.cont = DaosContainer(self.context)
            self.cont.create(self.pool.handle)
            self.d_log.debug("created container")
        except DaosApiError as excep:
            self.d_log.error("caught exception creating container: "
                             "{0}".format(excep))
            self.fail("caught exception creating container: {0}".format(excep))

        self.d_log.debug("opening container")
        self.cont.open()
        self.d_log.debug("opened container")

        # generate random dkey, akey each time
        # write 1mb until no space, then 1kb, etc. to fill pool quickly
        for obj_sz in [1048576, 1024, 1]:
            write_count = 0
            while True:
                self.d_log.debug("writing obj {0}, sz {1} to "
                                 "container".format(write_count, obj_sz))
                my_str = "a" * obj_sz
                my_str_sz = obj_sz
                dkey = (
                    ''.join(random.choice(string.lowercase) for i in range(5)))
                akey = (
                    ''.join(random.choice(string.lowercase) for i in range(5)))
                try:
                    dummy_oid, dummy_tx = self.cont.write_an_obj(my_str,
                                                                 my_str_sz,
                                                                 dkey, akey,
                                                                 obj_cls=1)
                    self.d_log.debug("wrote obj {0}, sz {1}".format(write_count,
                                                                    obj_sz))
                    write_count += 1
                except DaosApiError as excep:
                    if not (err in repr(excep) or err2 in repr(excep)):
                        self.d_log.error("caught exception while writing "
                                         "object: {0}".format(repr(excep)))
                        self.fail("caught exception while writing object: {0}"
                                  .format(repr(excep)))
                    else:
                        self.d_log.debug("pool is too full for {0} byte "
                                         "objects".format(obj_sz))
                        break

        self.d_log.debug("closing container")
        self.cont.close()
        self.d_log.debug("closed container")
        # create a 2nd container now that pool is full
        try:
            self.d_log.debug("creating 2nd container")
            self.cont2 = DaosContainer(self.context)
            self.cont2.create(self.pool.handle)
            self.d_log.debug("created 2nd container")

            self.d_log.debug("opening container 2")
            self.cont2.open()
            self.d_log.debug("opened container 2")

            self.d_log.debug("writing one more object, write expected to fail")
            self.cont2.write_an_obj(my_str, my_str_sz, dkey, akey, obj_cls=1)
            self.d_log.debug("wrote one more object--this should never print")
        except DaosApiError as excep:
            if not (err in repr(excep) or err2 in repr(excep)):
                self.d_log.error("caught unexpected exception while "
                                 "writing object: {0}".format(repr(excep)))
                self.fail("caught unexpected exception while writing "
                          "object: {0}".format(repr(excep)))
            else:
                self.d_log.debug("correctly caught -1007 while attempting "
                                 "to write object in full pool")
示例#17
0
class RebuildNoCap(TestWithServers):

    """
    Test Class Description:
    This class contains tests for pool rebuild.

    :avocado: recursive
    """

    def setUp(self):
        super(RebuildNoCap, self).setUp()
        # create a pool to test with
        createmode = self.params.get("mode", '/run/pool/createmode/')
        createuid = self.params.get("uid", '/run/pool/createuid/')
        creategid = self.params.get("gid", '/run/pool/creategid/')
        createsetid = self.params.get("setname", '/run/pool/createset/')
        createsize = self.params.get("size", '/run/pool/createsize/')
        self.pool = DaosPool(self.context)
        self.pool.create(createmode, createuid, creategid, createsize,
                         createsetid)
        uuid = self.pool.get_uuid_str()

        time.sleep(2)

        # stuff some bogus data into the pool
        how_many_bytes = long(self.params.get("datasize",
                                              '/run/testparams/datatowrite/'))
        exepath = self.prefix +\
                 "/../src/tests/ftest/util/write_some_data.py"
        cmd = "export DAOS_POOL={0}; export DAOS_SVCL=1; mpirun"\
              " --np 1 --host {1} {2} {3} testfile".format(
                  uuid, self.hostlist_servers[0], exepath, how_many_bytes)
        subprocess.call(cmd, shell=True)

    def tearDown(self):
        """ cleanup after the test """

        try:
            if self.pool:
                self.pool.destroy(1)
        finally:
            super(RebuildNoCap, self).tearDown()


    def test_rebuild_no_capacity(self):
        """
        :avocado: tags=pool,rebuild,nocap
        """
        try:
            print("\nsetup complete, starting test\n")

            # create a server object that references on of our pool target hosts
            # and then kill it
            svr_to_kill = int(self.params.get("rank_to_kill",
                                              '/run/testparams/ranks/'))
            d_server = DaosServer(self.context, bytes(self.server_group),
                                  svr_to_kill)

            time.sleep(1)
            d_server.kill(1)

            # exclude the target from the dead server
            self.pool.exclude([svr_to_kill])

            # exclude should trigger rebuild, check
            self.pool.connect(1 << 1)
            status = self.pool.pool_query()
            if not status.pi_ntargets == len(self.hostlist_servers):
                self.fail("target count wrong.\n")
            if not status.pi_ndisabled == 1:
                self.fail("disabled target count wrong.\n")

            # the pool should be too full to start a rebuild so
            # expecting an error
            # not sure yet specifically what error
            if status.pi_rebuild_st.rs_errno == 0:
                self.fail("expecting rebuild to fail but it didn't.\n")

        except DaosApiError as excep:
            print(excep)
            print(traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")
示例#18
0
    def test_rebuild_with_io(self):
        """
        Test ID: Rebuild-003

        Test Description: Trigger a rebuild while I/O is ongoing.

        Use Cases:
          -- single pool, single client performing continous read/write/verify
             sequence while failure/rebuild is triggered in another process

        :avocado: tags=pool,rebuild,rebuildwithio
        """

        # the rebuild tests need to redo this stuff each time so not in setup
        # as it usually would be
        server_group = self.params.get("name", '/server_config/',
                                       'daos_server')

        basepath = os.path.normpath(self.build_paths['PREFIX'] + "/../")

        self.hostlist = self.params.get("test_machines", '/run/hosts/')
        hostfile = write_host_file.write_host_file(self.hostlist, self.workdir)

        try:
            self.agent_sessions = AgentUtils.run_agent(basepath, self.hostlist)
            server_utils.run_server(hostfile, server_group, basepath)

            # use the uid/gid of the user running the test, these should
            # be perfectly valid
            createuid = os.geteuid()
            creategid = os.getegid()

            # parameters used in pool create that are in yaml
            createmode = self.params.get("mode", '/run/testparams/createmode/')
            createsetid = self.params.get("setname",
                                          '/run/testparams/createset/')
            createsize = self.params.get("size", '/run/testparams/createsize/')

            # initialize a python pool object then create the underlying
            # daos storage
            pool = DaosPool(self.context)
            pool.create(createmode, createuid, creategid,
                        createsize, createsetid, None)
            pool.connect(1 << 1)
            container = DaosContainer(self.context)
            container.create(pool.handle)
            container.open()

            # get pool status and make sure it all looks good before we start
            pool.pool_query()
            if pool.pool_info.pi_ndisabled != 0:
                self.fail("Number of disabled targets reporting incorrectly.\n")
            if pool.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error but rebuild hasn't run.\n")
            if pool.pool_info.pi_rebuild_st.rs_done != 1:
                self.fail("Rebuild is running but device hasn't failed yet.\n")
            if pool.pool_info.pi_rebuild_st.rs_obj_nr != 0:
                self.fail("Rebuilt objs not zero.\n")
            if pool.pool_info.pi_rebuild_st.rs_rec_nr != 0:
                self.fail("Rebuilt recs not zero.\n")
            dummy_pool_version = pool.pool_info.pi_rebuild_st.rs_version

            # do I/O for 30 seconds
            dummy_bw = io_utilities.continuous_io(container, 30)

            # trigger the rebuild
            rank = self.params.get("rank", '/run/testparams/ranks/*')
            server = DaosServer(self.context, server_group, rank)
            server.kill(1)
            pool.exclude([rank])

            # do another 30 seconds of I/O,
            # waiting for some improvements in server bootstrap
            # at which point we can move the I/O to a separate client and
            # really pound it with I/O
            dummy_bw = io_utilities.continuous_io(container, 30)

            # wait for the rebuild to finish
            while True:
                pool.pool_query()
                if pool.pool_info.pi_rebuild_st.rs_done == 1:
                    break
                else:
                    time.sleep(2)

            # check rebuild statistics
            if pool.pool_info.pi_ndisabled != 1:
                self.fail("Number of disabled targets reporting incorrectly: {}"
                          .format(pool.pool_info.pi_ndisabled))
            if pool.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error reported: {}".format(
                    pool.pool_info.pi_rebuild_st.rs_errno))
            if pool.pool_info.pi_rebuild_st.rs_obj_nr <= 0:
                self.fail("No objects have been rebuilt.")
            if pool.pool_info.pi_rebuild_st.rs_rec_nr <= 0:
                self.fail("No records have been rebuilt.")

        except (ValueError, DaosApiError) as excep:
            print(excep)
            print(traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")

        finally:
            # wait for the I/O process to finish
            try:
                server_utils.stop_server(hosts=self.hostlist)
                os.remove(hostfile)
                # really make sure everything is gone
                check_for_pool.cleanup_pools(self.hostlist)
            finally:
                if self.agent_sessions:
                    AgentUtils.stop_agent(self.hostlist, self.agent_sessions)
                server_utils.kill_server(self.hostlist)
示例#19
0
文件: nvme_io.py 项目: morsiee/daos
class NvmeIo(TestWithServers):
    """
    Test Class Description:
        Test the general Metadata operations and boundary conditions.
    :avocado: recursive
    """

    def setUp(self):
        super(NvmeIo, self).setUp()

        # initialize variables
        self.out_queue = None
        self.pool_connect = False

    def verify_pool_size(self, original_pool_info, ior_args):
        """
        Function is to validate the pool size
        original_pool_info: Pool info prior to IOR
        ior_args: IOR args to calculate the file size
        """
        #Get the current pool size for comparison
        current_pool_info = self.pool.pool_query()
        #if Transfer size is < 4K, Pool size will verified against NVMe, else
        #it will be checked against SCM
        if ior_args['transfer_size'] >= 4096:
            print("Size is > 4K,Size verification will be done with NVMe size")
            storage_index = 1
        else:
            print("Size is < 4K,Size verification will be done with SCM size")
            storage_index = 0

        free_pool_size = (
            original_pool_info.pi_space.ps_space.s_free[storage_index]
            - current_pool_info.pi_space.ps_space.s_free[storage_index])

        obj_multiplier = 1
        replica_number = re.findall(r'\d+', "ior_args['object_class']")
        if replica_number:
            obj_multiplier = int(replica_number[0])
        expected_pool_size = (ior_args['client_processes'] *
                              ior_args['block_size'] * obj_multiplier)

        if free_pool_size < expected_pool_size:
            raise DaosTestError(
                'Pool Free Size did not match Actual = {} Expected = {}'
                .format(free_pool_size, expected_pool_size))

    @avocado.fail_on(DaosApiError)
    def test_nvme_io(self):
        """
        Test ID: DAOS-2082
        Test Description: Test will run IOR with standard and non standard
        sizes.IOR will be run for all Object type supported. Purpose is to
        verify pool size (SCM and NVMe) for IOR file.
        This test is running multiple IOR on same server start instance.
        :avocado: tags=nvme,nvme_io,large
        """
        ior_args = {}

        tests = self.params.get("ior_sequence", '/run/ior/*')
        object_type = self.params.get("object_type", '/run/ior/*')
        #Loop for every IOR object type
        for obj_type in object_type:
            for ior_param in tests:
                #There is an issue with NVMe if Transfer size>64M, Skipped this
                #sizes for now
                if ior_param[2] > 67108864:
                    print ("Xfersize > 64M getting failed, DAOS-1264")
                    continue

                self.pool = DaosPool(self.context)
                self.pool.create(self.params.get("mode",
                                                 '/run/pool/createmode/*'),
                                 os.geteuid(),
                                 os.getegid(),
                                 ior_param[0],
                                 self.params.get("setname",
                                                 '/run/pool/createset/*'),
                                 nvme_size=ior_param[1])
                self.pool.connect(1 << 1)
                self.pool_connect = True
                createsvc = self.params.get("svcn", '/run/pool/createsvc/')
                svc_list = ""
                for i in range(createsvc):
                    svc_list += str(int(self.pool.svc.rl_ranks[i])) + ":"
                svc_list = svc_list[:-1]

                ior_args['client_hostfile'] = self.hostfile_clients
                ior_args['pool_uuid'] = self.pool.get_uuid_str()
                ior_args['svc_list'] = svc_list
                ior_args['basepath'] = self.basepath
                ior_args['server_group'] = self.server_group
                ior_args['tmp_dir'] = self.workdir
                ior_args['iorflags'] = self.params.get("iorflags",
                                                       '/run/ior/*')
                ior_args['iteration'] = self.params.get("iteration",
                                                        '/run/ior/*')
                ior_args['transfer_size'] = ior_param[2]
                ior_args['block_size'] = ior_param[3]
                ior_args['object_class'] = obj_type
                ior_args['client_processes'] = ior_param[4]

                try:
                    size_before_ior = self.pool.pool_query()
                    ior_utils.run_ior_daos(ior_args['client_hostfile'],
                                           ior_args['iorflags'],
                                           ior_args['iteration'],
                                           ior_args['block_size'],
                                           ior_args['transfer_size'],
                                           ior_args['pool_uuid'],
                                           ior_args['svc_list'],
                                           ior_args['object_class'],
                                           ior_args['basepath'],
                                           ior_args['client_processes'],
                                           cont_uuid=str(uuid.uuid4()),
                                           display_output=True)
                    self.verify_pool_size(size_before_ior, ior_args)
                except ior_utils.IorFailed as exe:
                    print (exe)
                    print (traceback.format_exc())
                    self.fail()
                try:
                    if self.pool_connect:
                        self.pool.disconnect()
                        self.pool_connect = False
                    if self.pool:
                        self.pool.destroy(1)
                except DaosApiError as exe:
                    print (exe)
                    self.fail("Failed to Destroy/Disconnect the Pool")
示例#20
0
    def test_rebuild_with_io(self):
        """
        Test ID: Rebuild-003

        Test Description: Trigger a rebuild while I/O is ongoing.

        Use Cases:
          -- single pool, single client performing continous read/write/verify
             sequence while failure/rebuild is triggered in another process

        :avocado: tags=pool,rebuild,rebuildwithio
        """

        # the rebuild tests need to redo this stuff each time so not in setup
        # as it usually would be
        server_group = self.params.get("name", '/server_config/',
                                       'daos_server')

        self.hostlist_servers = self.params.get("test_machines", '/run/hosts/')
        hostfile_servers = write_host_file.write_host_file(
            self.hostlist_servers, self.workdir)

        try:
            self.agent_sessions = agent_utils.run_agent(self.basepath,
                                                        self.hostlist_servers)
            server_utils.run_server(hostfile_servers, server_group,
                                    self.basepath)

            # use the uid/gid of the user running the test, these should
            # be perfectly valid
            createuid = os.geteuid()
            creategid = os.getegid()

            # parameters used in pool create that are in yaml
            createmode = self.params.get("mode", '/run/testparams/createmode/')
            createsetid = self.params.get("setname",
                                          '/run/testparams/createset/')
            createsize = self.params.get("size", '/run/testparams/createsize/')

            # initialize a python pool object then create the underlying
            # daos storage
            pool = DaosPool(self.context)
            pool.create(createmode, createuid, creategid,
                        createsize, createsetid, None)
            pool.connect(1 << 1)
            container = DaosContainer(self.context)
            container.create(pool.handle)
            container.open()

            # get pool status and make sure it all looks good before we start
            pool.pool_query()
            if pool.pool_info.pi_ndisabled != 0:
                self.fail("Number of disabled targets reporting incorrectly.\n")
            if pool.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error but rebuild hasn't run.\n")
            if pool.pool_info.pi_rebuild_st.rs_done != 1:
                self.fail("Rebuild is running but device hasn't failed yet.\n")
            if pool.pool_info.pi_rebuild_st.rs_obj_nr != 0:
                self.fail("Rebuilt objs not zero.\n")
            if pool.pool_info.pi_rebuild_st.rs_rec_nr != 0:
                self.fail("Rebuilt recs not zero.\n")
            dummy_pool_version = pool.pool_info.pi_rebuild_st.rs_version

            # do I/O for 30 seconds
            dummy_bw = io_utilities.continuous_io(container, 30)

            # trigger the rebuild
            rank = self.params.get("rank", '/run/testparams/ranks/*')
            server = DaosServer(self.context, server_group, rank)
            server.kill(1)
            pool.exclude([rank])

            # do another 30 seconds of I/O,
            # waiting for some improvements in server bootstrap
            # at which point we can move the I/O to a separate client and
            # really pound it with I/O
            dummy_bw = io_utilities.continuous_io(container, 30)

            # wait for the rebuild to finish
            while True:
                pool.pool_query()
                if pool.pool_info.pi_rebuild_st.rs_done == 1:
                    break
                else:
                    time.sleep(2)

            # check rebuild statistics
            if pool.pool_info.pi_ndisabled != 1:
                self.fail("Number of disabled targets reporting incorrectly: {}"
                          .format(pool.pool_info.pi_ndisabled))
            if pool.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error reported: {}".format(
                    pool.pool_info.pi_rebuild_st.rs_errno))
            if pool.pool_info.pi_rebuild_st.rs_obj_nr <= 0:
                self.fail("No objects have been rebuilt.")
            if pool.pool_info.pi_rebuild_st.rs_rec_nr <= 0:
                self.fail("No records have been rebuilt.")

        except (ValueError, DaosApiError) as excep:
            print(excep)
            print(traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")

        finally:
            # wait for the I/O process to finish
            try:
                server_utils.stop_server(hosts=self.hostlist_servers)
                os.remove(hostfile_servers)
                # really make sure everything is gone
                check_for_pool.cleanup_pools(self.hostlist_servers)
            finally:
                if self.agent_sessions:
                    agent_utils.stop_agent(self.agent_sessions)
                server_utils.kill_server(self.hostlist_servers)
示例#21
0
class FullPoolContainerCreate(Test):
    """
    Class for test to create a container in a pool with no remaining free space.
    """

    def setUp(self):
        # get paths from the build_vars generated by build
        with open(os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                "../../../../.build_vars.json")) as f:
            build_paths = json.load(f)
        self.basepath = os.path.normpath(build_paths['PREFIX'] + "/../")
        self.server_group = self.params.get("server_group", '/server/',
                                            'daos_default_oops')

        self.context = DaosContext(build_paths['PREFIX'] + '/lib/')

        self.pool = DaosPool(self.context)
        self.d_log = DaosLog(self.context)
        self.hostlist = self.params.get("test_machines1", '/hosts/')
        self.hostfile = WriteHostFile.WriteHostFile(self.hostlist, self.workdir)
        ServerUtils.runServer(self.hostfile, self.server_group, self.basepath)

    def tearDown(self):
        # shut 'er down
        """
        wrap pool destroy in a try; in case pool create didn't succeed, we
        still need the server to be shut down in any case
        """
        try:
            self.pool.destroy(1)
        finally:
            ServerUtils.stopServer(hosts=self.hostlist)

    def test_no_space_cont_create(self):
        """
        :avocado: tags=pool,cont,fullpoolcontcreate,small,vm
        """
        setid = self.params.get("setname",
                                '/run/testparams/setnames/validsetname/')

        # full storage rc
        err = "-1007"
        # probably should be -1007, revisit later
        err2 = "-1009"

        # create pool
        mode = self.params.get("mode", '/conttests/createmode/')
        self.d_log.debug("mode is {0}".format(mode))
        uid = os.geteuid()
        gid = os.getegid()
        # 16 mb pool, minimum size currently possible
        size = 16777216

        self.d_log.debug("creating pool")
        self.pool.create(mode, uid, gid, size, self.server_group, None)
        self.d_log.debug("created pool")

        # connect to the pool
        self.d_log.debug("connecting to pool")
        self.pool.connect(1 << 1)
        self.d_log.debug("connected to pool")

        # query the pool
        self.d_log.debug("querying pool info")
        pool_info = self.pool.pool_query()
        self.d_log.debug("queried pool info")

        # create a container
        try:
            self.d_log.debug("creating container")
            self.cont = DaosContainer(self.context)
            self.cont.create(self.pool.handle)
            self.d_log.debug("created container")
        except DaosApiError as e:
            self.d_log.error("caught exception creating container: "
                             "{0}".format(e))
            self.fail("caught exception creating container: {0}".format(e))

        self.d_log.debug("opening container")
        self.cont.open()
        self.d_log.debug("opened container")

        # generate random dkey, akey each time
        # write 1mb until no space, then 1kb, etc. to fill pool quickly
        for x in [1048576, 1024, 1]:
            write_count = 0
            while(True):
                self.d_log.debug("writing obj {0}, sz {1} to "
                                 "container".format(write_count, x))
                my_str = "a" * x
                my_str_sz = x
                dkey = ''.join(random.choice(string.lowercase) for i in range(5))
                akey = ''.join(random.choice(string.lowercase) for i in range(5))
                try:
                    oid, tx = self.cont.write_an_obj(my_str, my_str_sz, dkey,
                                                        akey, obj_cls=1)
                    self.d_log.debug("wrote obj {0}, sz {1}".format(write_count,
                                                                    x))
                    write_count += 1
                except DaosApiError as e:
                    if not (err in repr(e) or err2 in repr(e)):
                        self.d_log.error("caught exception while writing "
                                         "object: {0}".format(repr(e)))
                        self.fail("caught exception while writing object: {0}"
                                  .format(repr(e)))
                    else:
                        self.d_log.debug("pool is too full for {0} byte "
                                         "objects".format(x))
                        break

        self.d_log.debug("closing container")
        self.cont.close()
        self.d_log.debug("closed container")
        # create a 2nd container now that pool is full
        try:
            self.d_log.debug("creating 2nd container")
            self.cont2 = DaosContainer(self.context)
            self.cont2.create(self.pool.handle)
            self.d_log.debug("created 2nd container")

            self.d_log.debug("opening container 2")
            self.cont2.open()
            self.d_log.debug("opened container 2")

            self.d_log.debug("writing one more object, write expected to fail")
            self.cont2.write_an_obj(my_str, my_str_sz, dkey, akey, obj_cls=1)
            self.d_log.debug("wrote one more object--this should never print")
        except DaosApiError as e:
            if not (err in repr(e) or err2 in repr(e)):
                    self.d_log.error("caught unexpected exception while "
                                     "writing object: {0}".format(repr(e)))
                    self.fail("caught unexpected exception while writing "
                              "object: {0}".format(repr(e)))
            else:
                self.d_log.debug("correctly caught -1007 while attempting "
                                 "to write object in full pool")
示例#22
0
class RebuildNoCap(Test):
    """
    Test Class Description:
    This class contains tests for pool rebuild.

    :avocado: tags=pool,rebuild,nocap
    """

    build_paths = []
    server_group = ""
    CONTEXT = None
    POOL = None
    hostfile = ""

    def setUp(self):
        """ setup for the test """

        # get paths from the build_vars generated by build
        with open('../../../.build_vars.json') as f:
            build_paths = json.load(f)
        self.CONTEXT = DaosContext(build_paths['PREFIX'] + '/lib/')

        # generate a hostfile
        self.host_list = self.params.get("test_machines", '/run/hosts/')
        tmp = build_paths['PREFIX'] + '/tmp'
        self.hostfile = WriteHostFile.WriteHostFile(self.host_list, tmp)

        # fire up the DAOS servers
        self.server_group = self.params.get("server_group", '/run/server/',
                                            'daos_server')
        ServerUtils.runServer(self.hostfile, self.server_group,
                              build_paths['PREFIX'] + '/../')
        time.sleep(3)

        # create a pool to test with
        createmode = self.params.get("mode", '/run/pool/createmode/')
        createuid = self.params.get("uid", '/run/pool/createuid/')
        creategid = self.params.get("gid", '/run/pool/creategid/')
        createsetid = self.params.get("setname", '/run/pool/createset/')
        createsize = self.params.get("size", '/run/pool/createsize/')
        self.POOL = DaosPool(self.CONTEXT)
        self.POOL.create(createmode, createuid, creategid, createsize,
                         createsetid)
        uuid = self.POOL.get_uuid_str()

        time.sleep(2)

        # stuff some bogus data into the pool
        how_many_bytes = long(
            self.params.get("datasize", '/run/testparams/datatowrite/'))
        exepath = build_paths['PREFIX'] +\
                  "/../src/tests/ftest/util/WriteSomeData.py"
        cmd = "export DAOS_POOL={0}; export DAOS_SVCL=1; mpirun"\
              " --np 1 --host {1} {2} {3} testfile".format(
                  uuid, self.host_list[0], exepath, how_many_bytes)
        subprocess.call(cmd, shell=True)

    def tearDown(self):
        """ cleanup after the test """

        os.remove(self.hostfile)
        self.POOL.destroy(1)
        ServerUtils.stopServer()

    def test_rebuild_no_capacity(self):
        """
        :avocado: tags=pool,rebuild,nocap
        """
        try:
            print "\nsetup complete, starting test\n"

            # create a server object that references on of our pool target hosts
            # and then kill it
            svr_to_kill = int(
                self.params.get("rank_to_kill", '/run/testparams/ranks/'))
            sh = DaosServer(self.CONTEXT, bytes(self.server_group),
                            svr_to_kill)

            time.sleep(1)
            sh.kill(1)

            # exclude the target from the dead server
            self.POOL.exclude([svr_to_kill])

            # exclude should trigger rebuild, check
            self.POOL.connect(1 << 1)
            status = self.POOL.pool_query()
            if not status.pi_ntargets == len(self.host_list):
                self.fail("target count wrong.\n")
            if not status.pi_ndisabled == 1:
                self.fail("disabled target count wrong.\n")

            # the pool should be too full to start a rebuild so
            # expecting an error
            # not sure yet specifically what error
            if status.pi_rebuild_st[2] == 0:
                self.fail("expecting rebuild to fail but it didn't.\n")

        except ValueError as e:
            print(e)
            print(traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")
示例#23
0
文件: nvme_io.py 项目: yulujia/daos
class NvmeIo(IorTestBase):
    """Test class for NVMe with IO tests.

    Test Class Description:
        Test the general Metadata operations and boundary conditions.

    :avocado: recursive
    """
    @avocado.fail_on(DaosApiError)
    def test_nvme_io(self):
        """Jira ID: DAOS-2082.

        Test Description:
            Test will run IOR with standard and non standard sizes.  IOR will
            be run for all Object type supported. Purpose is to verify pool
            size (SCM and NVMe) for IOR file.

        Use Cases:
            Running multiple IOR on same server start instance.

        :avocado: tags=all,daosio,full_regression,hw,nvme_io
        """
        # Pool params
        pool_mode = self.params.get("mode", '/run/pool/createmode/*')
        pool_uid = os.geteuid()
        pool_gid = os.getegid()
        pool_group = self.params.get("setname", '/run/pool/createset/*')
        pool_svcn = self.params.get("svcn", '/run/pool/createsvc/')

        # Test params
        tests = self.params.get("ior_sequence", '/run/ior/*')
        object_type = self.params.get("object_type", '/run/ior/*')

        # Loop for every IOR object type
        for obj_type in object_type:
            for ior_param in tests:
                # There is an issue with NVMe if Transfer size>64M,
                # Skipped this sizes for now
                if ior_param[2] > 67108864:
                    self.log.warning("Xfersize > 64M fails - DAOS-1264")
                    continue

                # Create and connect to a pool
                self.pool = DaosPool(self.context)
                self.pool.create(pool_mode,
                                 pool_uid,
                                 pool_gid,
                                 ior_param[0],
                                 pool_group,
                                 svcn=pool_svcn,
                                 nvme_size=ior_param[1])
                self.pool.connect(1 << 1)

                # Get the current pool sizes
                size_before_ior = self.pool.pool_query()

                # Run ior with the parameters specified for this pass
                self.ior_cmd.transfer_size.update(ior_param[2])
                self.ior_cmd.block_size.update(ior_param[3])
                self.ior_cmd.daos_oclass.update(obj_type)
                self.ior_cmd.set_daos_params(self.server_group, self.pool)
                self.run_ior(self.get_job_manager_command(), ior_param[4])

                # Verify IOR consumed the expected amount ofrom the pool
                self.verify_pool_size(size_before_ior, ior_param[4])

                try:
                    if self.pool:
                        self.pool.disconnect()
                        self.pool.destroy(1)
                except DaosApiError as error:
                    self.log.error("Pool disconnect/destroy error: %s",
                                   str(error))
                    self.fail("Failed to Destroy/Disconnect the Pool")
示例#24
0
    def test_simple_rebuild(self):
        """
        Test ID: Rebuild-001

        Test Description: The most basic rebuild test.

        Use Cases:
          -- single pool rebuild, single client, various reord/object
             counts

        :avocado: tags=pool,rebuild,rebuildsimple
        """

        # the rebuild tests need to redo this stuff each time so not in setup
        # as it usually would be
        setid = self.params.get("setname", '/run/testparams/setnames/')
        server_group = self.params.get("server_group", '/server/',
                                       'daos_server')

        basepath = os.path.normpath(self.build_paths['PREFIX'] + "/../")
        tmp = self.build_paths['PREFIX'] + '/tmp'

        self.hostlist = self.params.get("test_machines", '/run/hosts/')
        hostfile = WriteHostFile.WriteHostFile(self.hostlist, tmp)

        try:
            ServerUtils.runServer(hostfile, server_group, basepath)

            # use the uid/gid of the user running the test, these should
            # be perfectly valid
            createuid = os.geteuid()
            creategid = os.getegid()

            # parameters used in pool create that are in yaml
            createmode = self.params.get("mode", '/run/testparams/createmode/')
            createsetid = self.params.get("setname",
                                          '/run/testparams/createset/')
            createsize = self.params.get("size", '/run/testparams/createsize/')

            # initialize a python pool object then create the underlying
            # daos storage
            pool = DaosPool(self.Context)
            pool.create(createmode, createuid, creategid, createsize,
                        createsetid, None)

            # want an open connection during rebuild
            pool.connect(1 << 1)

            # get pool status we want to test later
            pool.pool_query()
            if pool.pool_info.pi_ndisabled != 0:
                self.fail(
                    "Number of disabled targets reporting incorrectly.\n")
            if pool.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error but rebuild hasn't run.\n")
            if pool.pool_info.pi_rebuild_st.rs_done != 1:
                self.fail("Rebuild is running but device hasn't failed yet.\n")
            if pool.pool_info.pi_rebuild_st.rs_obj_nr != 0:
                self.fail("Rebuilt objs not zero.\n")
            if pool.pool_info.pi_rebuild_st.rs_rec_nr != 0:
                self.fail("Rebuilt recs not zero.\n")
            pool_version = pool.pool_info.pi_rebuild_st.rs_version

            # create a container
            container = DaosContainer(self.Context)
            container.create(pool.handle)

            # now open it
            container.open()

            # how many objects and records are we creating
            objcount = self.params.get("objcount",
                                       '/run/testparams/numobjects/*')
            reccount = self.params.get("reccount",
                                       '/run/testparams/numrecords/*')
            if objcount == 0:
                reccount = 0

            # which rank to write to and kill
            rank = self.params.get("rank", '/run/testparams/ranks/*')

            # how much data to write with each key
            size = self.params.get("size", '/run/testparams/datasize/')

            saved_data = []
            for i in range(0, objcount):
                obj = None
                for j in range(0, reccount):

                    # make some stuff up and write
                    dkey = ''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(5))
                    akey = ''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(5))
                    data = ''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(size))

                    obj, tx = container.write_an_obj(data, len(data), dkey,
                                                     akey, obj, rank)

                    saved_data.append((obj, dkey, akey, data, tx))

                    # read the data back and make sure its correct
                    data2 = container.read_an_obj(size, dkey, akey, obj, tx)
                    if data != data2.value:
                        self.fail("Write data 1, read it back, didn't match\n")

            # kill a server that has
            server = DaosServer(self.Context, server_group, rank)
            server.kill(1)

            # temporarily, the exclude of a failed target must be done
            # manually
            pool.exclude([rank])

            while True:
                # get the pool/rebuild status again
                pool.pool_query()
                if pool.pool_info.pi_rebuild_st.rs_done == 1:
                    break
                else:
                    time.sleep(2)

            if pool.pool_info.pi_ndisabled != 1:
                self.fail(
                    "Number of disabled targets reporting incorrectly: {}".
                    format(pool.pool_info.pi_ndisabled))
            if pool.pool_info.pi_rebuild_st.rs_errno != 0:
                self.fail("Rebuild error reported: {}".format(
                    pool.pool_info.pi_rebuild_st.rs_errno))
            if pool.pool_info.pi_rebuild_st.rs_obj_nr != objcount:
                self.fail("Rebuilt objs not as expected: {0} {1}".format(
                    pool.pool_info.pi_rebuild_st.rs_obj_nr, objcount))
            if pool.pool_info.pi_rebuild_st.rs_rec_nr != (reccount * objcount):
                self.fail("Rebuilt recs not as expected: {0} {1}".format(
                    pool.pool_info.pi_rebuild_st.rs_rec_nr,
                    reccount * objcount))

            # now that the rebuild finished verify the records are correct
            for tup in saved_data:
                data2 = container.read_an_obj(len(tup[3]), tup[1], tup[2],
                                              tup[0], tup[4])
                if tup[3] != data2.value:
                    self.fail("after rebuild data didn't check out")

        except DaosApiError as e:
            print(e)
            print(traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")

        finally:
            try:
                ServerUtils.stopServer(hosts=self.hostlist)
                os.remove(hostfile)
                # really make sure everything is gone
                CheckForPool.CleanupPools(self.hostlist)
            finally:
                ServerUtils.killServer(self.hostlist)
示例#25
0
class RebuildNoCap(Test):

    """
    Test Class Description:
    This class contains tests for pool rebuild.

    :avocado: tags=pool,rebuild,nocap
    """
    def setUp(self):
        """ setup for the test """
        self.agent_sessions = None
        # get paths from the build_vars generated by build
        with open('../../../.build_vars.json') as build_file:
            build_paths = json.load(build_file)
        self.context = DaosContext(build_paths['PREFIX'] + '/lib/')
        self.basepath = os.path.normpath(build_paths['PREFIX'] + "/../")

        # generate a hostfile
        self.hostlist = self.params.get("test_machines", '/run/hosts/')
        self.hostfile = write_host_file.write_host_file(self.hostlist,
                                                        self.workdir)

        # fire up the DAOS servers
        self.server_group = self.params.get("name", '/run/server_config/',
                                            'daos_server')
        self.agent_sessions = AgentUtils.run_agent(self.basepath, self.hostlist)
        server_utils.run_server(self.hostfile, self.server_group,
                                build_paths['PREFIX'] + '/../')

        # create a pool to test with
        createmode = self.params.get("mode", '/run/pool/createmode/')
        createuid = self.params.get("uid", '/run/pool/createuid/')
        creategid = self.params.get("gid", '/run/pool/creategid/')
        createsetid = self.params.get("setname", '/run/pool/createset/')
        createsize = self.params.get("size", '/run/pool/createsize/')
        self.pool = DaosPool(self.context)
        self.pool.create(createmode, createuid, creategid, createsize,
                         createsetid)
        uuid = self.pool.get_uuid_str()

        time.sleep(2)

        # stuff some bogus data into the pool
        how_many_bytes = long(self.params.get("datasize",
                                              '/run/testparams/datatowrite/'))
        exepath = os.path.join(build_paths['PREFIX'],
                               "/../src/tests/ftest/util/write_some_data.py")
        cmd = "export DAOS_POOL={0}; export DAOS_SVCL=1; mpirun"\
              " --np 1 --host {1} {2} {3} testfile".format(
                  uuid, self.hostlist[0], exepath, how_many_bytes)
        subprocess.call(cmd, shell=True)

    def tearDown(self):
        """ cleanup after the test """

        try:
            os.remove(self.hostfile)
            if self.pool:
                self.pool.destroy(1)
        finally:
            if self.agent_sessions:
                AgentUtils.stop_agent(self.hostlist, self.agent_sessions)
            server_utils.stop_server(hosts=self.hostlist)


    def test_rebuild_no_capacity(self):
        """
        :avocado: tags=pool,rebuild,nocap
        """
        try:
            print("\nsetup complete, starting test\n")

            # create a server object that references on of our pool target hosts
            # and then kill it
            svr_to_kill = int(self.params.get("rank_to_kill",
                                              '/run/testparams/ranks/'))
            d_server = DaosServer(self.context, bytes(self.server_group),
                                  svr_to_kill)

            time.sleep(1)
            d_server.kill(1)

            # exclude the target from the dead server
            self.pool.exclude([svr_to_kill])

            # exclude should trigger rebuild, check
            self.pool.connect(1 << 1)
            status = self.pool.pool_query()
            if not status.pi_ntargets == len(self.hostlist):
                self.fail("target count wrong.\n")
            if not status.pi_ndisabled == 1:
                self.fail("disabled target count wrong.\n")

            # the pool should be too full to start a rebuild so
            # expecting an error
            # not sure yet specifically what error
            if status.pi_rebuild_st.rs_errno == 0:
                self.fail("expecting rebuild to fail but it didn't.\n")

        except DaosApiError as excep:
            print(excep)
            print(traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")
示例#26
0
class DestroyRebuild(Test):

    """
    Test Class Description:
    This test verifies destruction of a pool that is rebuilding.

    :avocado: tags=pool,pooldestroy,rebuild,desreb
    """

    build_paths = []
    server_group = ""
    CONTEXT = None
    POOL = None
    hostfile = ""

    def setUp(self):
        """ setup for the test """

        # get paths from the build_vars generated by build
        with open('../../../.build_vars.json') as f:
              build_paths = json.load(f)
        self.CONTEXT = DaosContext(build_paths['PREFIX'] + '/lib/')

        # generate a hostfile
        self.hostlist = self.params.get("test_machines",'/run/hosts/')
        tmp = build_paths['PREFIX'] + '/tmp'
        self.hostfile = WriteHostFile.WriteHostFile(self.hostlist, tmp)

        # fire up the DAOS servers
        self.server_group = self.params.get("server_group",'/run/server/',
                                      'daos_server')
        ServerUtils.runServer(self.hostfile, self.server_group,
                             build_paths['PREFIX'] + '/../')
        time.sleep(3)

        # create a pool to test with
        createmode = self.params.get("mode",'/run/pool/createmode/')
        createuid  = self.params.get("uid",'/run/pool/createuid/')
        creategid  = self.params.get("gid",'/run/pool/creategid/')
        createsetid = self.params.get("setname",'/run/pool/createset/')
        createsize  = self.params.get("size",'/run/pool/createsize/')
        self.POOL = DaosPool(self.CONTEXT)
        self.POOL.create(createmode, createuid, creategid, createsize,
                        createsetid)
        uuid = self.POOL.get_uuid_str()

        time.sleep(2)

    def tearDown(self):
        """ cleanup after the test """

        try:
            os.remove(self.hostfile)
            if self.POOL:
                self.POOL.destroy(1)
        finally:
            ServerUtils.stopServer(hosts=self.hostlist)


    def test_destroy_while_rebuilding(self):
        """
        :avocado: tags=pool,pooldestroy,rebuild,desreb
        """
        try:
            print "\nsetup complete, starting test\n"

            # create a server object that references on of our pool target hosts
            # and then kill it
            svr_to_kill = int(self.params.get("rank_to_kill",
                                              '/run/testparams/ranks/'))
            sh = DaosServer(self.CONTEXT, bytes(self.server_group), svr_to_kill)

            print "created server "

            # BUG if you don't connect the rebuild doesn't start correctly
            self.POOL.connect(1 << 1)
            status = self.POOL.pool_query()
            if not status.pi_ntargets == len(self.hostlist):
                self.fail("target count wrong.\n")
            if not status.pi_ndisabled == 0:
                self.fail("disabled target count wrong.\n")

            print "connect "

            time.sleep(1)
            sh.kill(1)

            print "killed server "

            # exclude the target from the dead server
            self.POOL.exclude([svr_to_kill])

            print "exclude target "

            #self.POOL.disconnect()
            #print "disconnect "

            # the rebuild won't take long since there is no data so do
            # the destroy quickly
            self.POOL.destroy(1)
            print "destroy "

        except DaosApiError as e:
                print(e)
                print(traceback.format_exc())
                self.fail("Expecting to pass but test has failed.\n")
示例#27
0
class DestroyRebuild(Test):

    """
    Test Class Description:
    This test verifies destruction of a pool that is rebuilding.

    :avocado: recursive
    """

    build_paths = []
    server_group = ""
    context = None
    pool = None
    hostfile_servers = ""

    def setUp(self):
        """ setup for the test """
        self.agent_sessions = None
        # get paths from the build_vars generated by build
        with open('../../../.build_vars.json') as build_file:
            build_paths = json.load(build_file)
        self.context = DaosContext(build_paths['PREFIX'] + '/lib/')
        self.basepath = os.path.normpath(build_paths['PREFIX'] + "/../")

        # generate a hostfile
        self.hostlist_servers = self.params.get("test_machines", '/run/hosts/')
        self.hostfile_servers = write_host_file.write_host_file(
            self.hostlist_servers, self.workdir)

        # fire up the DAOS servers
        self.server_group = self.params.get("name", '/run/server_config/',
                                            'daos_server')
        self.agent_sessions = agent_utils.run_agent(self.basepath,
                                                    self.hostlist_servers)
        server_utils.run_server(self.hostfile_servers, self.server_group,
                                build_paths['PREFIX'] + '/../')

        # create a pool to test with
        createmode = self.params.get("mode", '/run/pool/createmode/')
        createuid = self.params.get("uid", '/run/pool/createuid/')
        creategid = self.params.get("gid", '/run/pool/creategid/')
        createsetid = self.params.get("setname", '/run/pool/createset/')
        createsize = self.params.get("size", '/run/pool/createsize/')
        self.pool = DaosPool(self.context)
        self.pool.create(createmode, createuid, creategid, createsize,
                         createsetid)
        self.pool.get_uuid_str()

        time.sleep(2)

    def tearDown(self):
        """ cleanup after the test """

        try:
            os.remove(self.hostfile_servers)
            if self.pool:
                self.pool.destroy(1)
        finally:
            if self.agent_sessions:
                agent_utils.stop_agent(self.agent_sessions)
            server_utils.stop_server(hosts=self.hostlist_servers)


    def test_destroy_while_rebuilding(self):
        """
        :avocado: tags=pool,pooldestroy,rebuild,desreb
        """
        try:
            print("\nsetup complete, starting test\n")

            # create a server object that references on of our pool target hosts
            # and then kill it
            svr_to_kill = int(self.params.get("rank_to_kill",
                                              '/run/testparams/ranks/'))
            server = DaosServer(self.context, bytes(self.server_group),
                                svr_to_kill)

            print("created server ")

            # BUG if you don't connect the rebuild doesn't start correctly
            self.pool.connect(1 << 1)
            status = self.pool.pool_query()
            if not status.pi_ntargets == len(self.hostlist_servers):
                self.fail("target count wrong.\n")
            if not status.pi_ndisabled == 0:
                self.fail("disabled target count wrong.\n")

            print("connect ")

            time.sleep(1)
            server.kill(1)

            print("killed server ")

            # exclude the target from the dead server
            self.pool.exclude([svr_to_kill])

            print("exclude target ")

            #self.pool.disconnect()
            #print "disconnect "

            # the rebuild won't take long since there is no data so do
            # the destroy quickly
            self.pool.destroy(1)
            print("destroy ")

        except DaosApiError as excep:
            print(excep)
            print(traceback.format_exc())
            self.fail("Expecting to pass but test has failed.\n")
示例#28
0
class FullPoolContainerCreate(TestWithServers):
    """
    Class for test to create a container in a pool with no remaining free space.
    :avocado: recursive
    """
    def __init__(self, *args, **kwargs):
        super(FullPoolContainerCreate, self).__init__(*args, **kwargs)
        self.cont = None
        self.cont2 = None

    @skipForTicket("DAOS-3142")
    def test_no_space_cont_create(self):
        """
        :avocado: tags=all,container,tiny,full_regression,fullpoolcontcreate
        """
        # full storage rc
        err = "-1007"
        # probably should be -1007, revisit later
        err2 = "-1009"

        # create pool
        self.pool = DaosPool(self.context)
        mode = self.params.get("mode", '/conttests/createmode/')
        self.d_log.debug("mode is {0}".format(mode))
        uid = os.geteuid()
        gid = os.getegid()
        # 16 mb pool, minimum size currently possible
        size = 16777216

        self.d_log.debug("creating pool")
        self.pool.create(mode, uid, gid, size, self.server_group, None)
        self.d_log.debug("created pool")

        # connect to the pool
        self.d_log.debug("connecting to pool")
        self.pool.connect(1 << 1)
        self.d_log.debug("connected to pool")

        # query the pool
        self.d_log.debug("querying pool info")
        dummy_pool_info = self.pool.pool_query()
        self.d_log.debug("queried pool info")

        # create a container
        try:
            self.d_log.debug("creating container")
            self.cont = DaosContainer(self.context)
            self.cont.create(self.pool.handle)
            self.d_log.debug("created container")
        except DaosApiError as excep:
            self.d_log.error("caught exception creating container: "
                             "{0}".format(excep))
            self.fail("caught exception creating container: {0}".format(excep))

        self.d_log.debug("opening container")
        self.cont.open()
        self.d_log.debug("opened container")

        # generate random dkey, akey each time
        # write 1mb until no space, then 1kb, etc. to fill pool quickly
        for obj_sz in [1048576, 10240, 10, 1]:
            write_count = 0
            while True:
                self.d_log.debug("writing obj {0}, sz {1} to "
                                 "container".format(write_count, obj_sz))
                my_str = "a" * obj_sz
                my_str_sz = obj_sz
                dkey = (''.join(
                    random.choice(string.lowercase) for i in range(5)))
                akey = (''.join(
                    random.choice(string.lowercase) for i in range(5)))
                try:
                    dummy_oid, dummy_tx = self.cont.write_an_obj(
                        my_str, my_str_sz, dkey, akey, obj_cls="OC_SX")
                    self.d_log.debug("wrote obj {0}, sz {1}".format(
                        write_count, obj_sz))
                    write_count += 1
                except DaosApiError as excep:
                    if not (err in repr(excep) or err2 in repr(excep)):
                        self.d_log.error("caught exception while writing "
                                         "object: {0}".format(repr(excep)))
                        self.fail("caught exception while writing object: {0}".
                                  format(repr(excep)))
                    else:
                        self.d_log.debug("pool is too full for {0} byte "
                                         "objects".format(obj_sz))
                        break

        self.d_log.debug("closing container")
        self.cont.close()
        self.d_log.debug("closed container")
        # create a 2nd container now that pool is full
        try:
            self.d_log.debug("creating 2nd container")
            self.cont2 = DaosContainer(self.context)
            self.cont2.create(self.pool.handle)
            self.d_log.debug("created 2nd container")

            self.d_log.debug("opening container 2")
            self.cont2.open()
            self.d_log.debug("opened container 2")

            self.d_log.debug("writing one more object, write expected to fail")
            self.cont2.write_an_obj(my_str,
                                    my_str_sz,
                                    dkey,
                                    akey,
                                    obj_cls="OC_SX")
            self.fail("wrote one more object after pool was completely filled,"
                      " this should never print")
        except DaosApiError as excep:
            if not (err in repr(excep) or err2 in repr(excep)):
                self.d_log.error("caught unexpected exception while "
                                 "writing object: {0}".format(repr(excep)))
                self.fail("caught unexpected exception while writing "
                          "object: {0}".format(repr(excep)))
            else:
                self.d_log.debug("correctly caught -1007 while attempting "
                                 "to write object in full pool")
示例#29
0
class FullPoolContainerCreate(Test):
    """
    Class for test to create a container in a pool with no remaining free space.
    """

    def setUp(self):
        # get paths from the build_vars generated by build
        with open(os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                "../../../../.build_vars.json")) as f:
            build_paths = json.load(f)
        self.basepath = os.path.normpath(build_paths['PREFIX'] + "/../")
        self.tmp = build_paths['PREFIX'] + '/tmp'
        self.server_group = self.params.get("server_group", '/server/',
                                            'daos_default_oops')

        self.context = DaosContext(build_paths['PREFIX'] + '/lib/')
        print("initialized!!!\n")

        self.pool = DaosPool(self.context)
        self.hostlist = self.params.get("test_machines1", '/hosts/')
        self.hostfile = WriteHostFile.WriteHostFile(self.hostlist, self.tmp)
        ServerUtils.runServer(self.hostfile, self.server_group, self.basepath)

        time.sleep(5)

    def tearDown(self):
        # shut 'er down
        ServerUtils.stopServer()
        if self.hostfile is not None:
            os.remove(self.hostfile)

    def test_no_space_cont_create(self):
        """
        :avocado: tags=pool,container,fullpoolcontcreate,small,vm
        """
        setid = self.params.get("setname",
                                '/run/testparams/setnames/validsetname/')

        # create pool
        mode = self.params.get("mode", '/conttests/createmode/')
        print("mode is {0}".format(mode))
        uid = os.geteuid()
        gid = os.getegid()
        # 16 mb pool, minimum size currently possible
        size = 16777216

        print("creating pool")
        self.pool.create(mode, uid, gid, size, self.server_group, None)
        print("created pool")

        # connect to the pool
        print("connecting to pool")
        self.pool.connect(1 << 1)
        print("connected to pool")

        # query the pool
        print("querying pool info")
        pool_info = self.pool.pool_query()
        print("queried pool info")

        # create a container
        try:
            print("creating container")
            self.cont = DaosContainer(self.context)
            self.cont.create(self.pool.handle)
            print("created container")
        except ValueError as e:
            self.fail("caught exception creating container: {0}".format(e))

        print("opening container")
        self.cont.open()
        print("opened container")

        # generate random dkey, akey each time
        # write 1mb until no space, then 1kb, etc. to fill pool quickly
        for x in [1048576, 1024, 1]:
            write_count = 0
            while(True):
                print("writing obj {0}, sz {1} to container".format(write_count, x))
                my_str = "a" * x
                my_str_sz = x
                dkey = ''.join(random.choice(string.lowercase) for i in range(5))
                akey = ''.join(random.choice(string.lowercase) for i in range(5))
                try:
                    oid, epoch = self.cont.write_an_obj(my_str, my_str_sz, dkey, akey)
                    print("wrote obj {0}, sz {1}".format(write_count, x))
                    write_count += 1
                except ValueError as e:
                    if "RC: -1007" not in repr(e):
                        self.fail("caught exception while writing object: {0}"
                                  .format(repr(e)))
                    else:
                        print("pool is too full for {0} byte objects".format(x))
                        break

        print("closing container")
        self.cont.close()
        print("closed container")
        # create a 2nd container now that pool is full
        try:
            print("creating 2nd container")
            self.cont2 = DaosContainer(self.context)
            self.cont2.create(self.pool.handle)
            print("created 2nd container")

            print("writing one more object, write expected to fail")
            self.cont2.write_an_obj(my_str, my_str_sz, dkey, akey)
            print("wrote one more object--this should never print")
        except ValueError as e:
            if "RC: -1007" not in repr(e):
                    self.fail("caught unexpected exception while writing "
                              "object: {0}".format(repr(e)))
            else:
                print("correctly caught -1007 while attempting "
                      "to write object in full pool")