def check_handle(buf_len, iov_len, buf, uuidstr, rank): """ This gets run in a child process and verifyes the global handle can be turned into a local handle in another process. """ try: # get paths from the build_vars generated by build with open('../../../.build_vars.json') as build_file: build_paths = json.load(build_file) # setup the DAOS python API in this process context = DaosContext(build_paths['PREFIX'] + '/lib/') pool = DaosPool(context) pool.set_uuid_str(uuidstr) pool.set_svc(rank) pool.group = "daos_server" # note that the handle is stored inside the pool as well dummy_local_handle = pool.global2local(context, iov_len, buf_len, buf) # perform some operations that will use the new handle pool.pool_query() container = DaosContainer(context) container.create(pool.handle) except DaosApiError as excep: print(excep) print(traceback.format_exc()) raise return
def check_handle(pool_glob_handle, uuidstr, cont_glob_handle, rank): """ This gets run in a child process and verifyes the global handles can be turned into local handles in another process. """ try: # get paths from the build_vars generated by build with open('../../../.build_vars.json') as build_file: build_paths = json.load(build_file) # setup the DAOS python API in this process context = DaosContext(build_paths['PREFIX'] + '/lib/') # setup the pool and connect using global handle pool = DaosPool(context) pool.uuid = uuidstr pool.set_svc(rank) pool.group = "daos_server" buf = ctypes.cast(pool_glob_handle.iov_buf, ctypes.POINTER(ctypes.c_byte * pool_glob_handle.iov_buf_len)) buf2 = bytearray() buf2.extend(buf.contents) pool_handle = pool.global2local(context, pool_glob_handle.iov_len, pool_glob_handle.iov_buf_len, buf2) # perform an operation that will use the new handle, if it # doesn't throw an exception, then all is well. pool.pool_query() # setup the container and then connect using the global handle container = DaosContainer(context) container.poh = pool_handle buf = ctypes.cast(cont_glob_handle.iov_buf, ctypes.POINTER(ctypes.c_byte * cont_glob_handle.iov_buf_len)) buf2 = bytearray() buf2.extend(buf.contents) dummy_cont_handle = container.global2local(context, cont_glob_handle.iov_len, cont_glob_handle.iov_buf_len, buf2) # just try one thing to make sure handle is good container.query() except DaosApiError as excep: print(excep) print(traceback.format_exc()) raise return
class InfoTests(Test): """ Tests DAOS pool query. """ def setUp(self): # get paths from the build_vars generated by build with open( os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../../../.build_vars.json")) as f: build_paths = json.load(f) self.basepath = os.path.normpath(build_paths['PREFIX'] + "/../") self.tmp = build_paths['PREFIX'] + '/tmp' self.server_group = self.params.get("server_group", '/server/', 'daos_server') context = DaosContext(build_paths['PREFIX'] + '/lib/') self.pool = DaosPool(context) self.d_log = DaosLog(context) self.hostlist = self.params.get("test_machines1", '/run/hosts/') self.hostfile = WriteHostFile.WriteHostFile(self.hostlist, self.tmp) ServerUtils.runServer(self.hostfile, self.server_group, self.basepath) def tearDown(self): # shut 'er down try: if self.pool: self.pool.destroy(1) os.remove(self.hostfile) finally: ServerUtils.stopServer(hosts=self.hostlist) def test_simple_query(self): """ Test querying a pool created on a single server. :avocado: tags=pool,poolquery,infotest """ # create pool mode = self.params.get("mode", '/run/testparams/modes/*', 0731) if mode == 73: self.cancel('Cancel the mode test 73 because of DAOS-1877') uid = os.geteuid() gid = os.getegid() size = self.params.get("size", '/run/testparams/sizes/*', 0) group = self.server_group self.pool.create(mode, uid, gid, size, group, None) # connect to the pool flags = self.params.get("perms", '/run/testparams/connectperms/*', '') connect_flags = 1 << flags self.pool.connect(connect_flags) # query the pool pool_info = self.pool.pool_query() # check uuid uuid_str = c_uuid_to_str(pool_info.pi_uuid) if uuid_str != self.pool.get_uuid_str(): self.d_log.error("UUID str does not match expected string") self.fail("UUID str does not match expected string") ''' # validate size of pool is what we expect This check is currently disabled, as space is not implemented in DAOS C API yet. if size != pool_info.pi_space: self.d_log.error("expected size {0} did not match actual size {1}" .format(size, pool_info.pi_space)) self.fail("expected size {0} did not match actual size {1}" .format(size, pool_info.pi_space)) ''' # number of targets if pool_info.pi_ntargets != len(self.hostlist): self.d_log.error("found number of targets in pool did not match " "expected number, 1. num targets: {0}".format( pool_info.pi_ntargets)) self.fail("found number of targets in pool did not match " "expected number, 1. num targets: {0}".format( pool_info.pi_ntargets)) # number of disabled targets if pool_info.pi_ndisabled > 0: self.d_log.error("found disabled targets, none expected to be") self.fail("found disabled targets, none expected to be disabled") # mode if pool_info.pi_mode != mode: self.d_log.error( "found different mode than expected. expected {0}, " "found {1}.".format(mode, pool_info.pi_mode)) self.fail("found different mode than expected. expected {0}, " "found {1}.".format(mode, pool_info.pi_mode)) # uid if pool_info.pi_uid != uid: self.d_log.error( "found actual pool uid {0} does not match expected " "uid {1}".format(pool_info.pi_uid, uid)) self.fail("found actual pool uid {0} does not match expected uid " "{1}".format(pool_info.pi_uid, uid)) # gid if pool_info.pi_gid != gid: self.d_log.error( "found actual pool gid {0} does not match expected " "gid {1}".format(pool_info.pi_gid, gid)) self.fail("found actual pool gid {0} does not match expected gid " "{1}".format(pool_info.pi_gid, gid))
class InfoTests(Test): """ Tests DAOS pool query. """ def setUp(self): # get paths from the build_vars generated by build with open( os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../../../.build_vars.json")) as f: build_paths = json.load(f) self.basepath = os.path.normpath(build_paths['PREFIX'] + "/../") self.tmp = build_paths['PREFIX'] + '/tmp' self.server_group = self.params.get("server_group", '/server/', 'daos_server') context = DaosContext(build_paths['PREFIX'] + '/lib/') print("initialized!!!\n") self.pool = DaosPool(context) self.hostlist = self.params.get("test_machines1", '/run/hosts/') self.hostfile = WriteHostFile.WriteHostFile(self.hostlist, self.tmp) ServerUtils.runServer(self.hostfile, self.server_group, self.basepath) def tearDown(self): # shut 'er down self.pool.destroy(1) ServerUtils.stopServer() os.remove(self.hostfile) def test_simple_query(self): """ Test querying a pool created on a single server. :avocado: tags=pool,poolquery,infotest """ # there is a presumption that this test lives in a specific spot # in the repo # create pool mode = self.params.get("mode", '/run/testparams/modes/*', 0731) uid = os.geteuid() gid = os.getegid() size = self.params.get("size", '/run/testparams/sizes/*', 0) tgt_list = None group = self.server_group self.pool.create(mode, uid, gid, size, group, tgt_list) PROGRESS_LOG.info("created pool") # connect to the pool flags = self.params.get("perms", '/run/testparams/connectperms/*', '') connect_flags = 1 << flags self.pool.connect(connect_flags) PROGRESS_LOG.info("connected to pool") # query the pool pool_info = self.pool.pool_query() PROGRESS_LOG.info("queried pool info") # check uuid uuid_str = c_uuid_to_str(pool_info.pi_uuid) PROGRESS_LOG.info("pool uuid pool_info.pi_uuid: {0}".format(uuid_str)) PROGRESS_LOG.info("pool uuid saved in api at create time: " "{0}".format(self.pool.get_uuid_str())) if uuid_str != self.pool.get_uuid_str(): self.fail("UUID str does not match expected string") # validate size of pool is what we expect PROGRESS_LOG.info("pool should be {0} bytes".format(size)) PROGRESS_LOG.info("pool actual space is {0} bytes".format( pool_info.pi_space)) ''' This check is currently disabled, as space is not implemented in DAOS C API yet. if size != pool_info.pi_space: self.fail("expected size {0} did not match actual size {1}" .format(size, pool_info.pi_space)) ''' # number of targets PROGRESS_LOG.info("number of targets in pool: %s", pool_info.pi_ntargets) if pool_info.pi_ntargets != len(self.hostlist): self.fail("found number of targets in pool did not match " "expected number, 1. num targets: {0}".format( pool_info.pi_ntargets)) # number of disabled targets PROGRESS_LOG.info("number of disabled targets in pool: %s", pool_info.pi_ndisabled) if pool_info.pi_ndisabled > 0: self.fail("found disabled targets, none expected to be disabled") # mode PROGRESS_LOG.info("pool mode: %s", pool_info.pi_mode) if pool_info.pi_mode != mode: self.fail("found different mode than expected. expected {0}, " "found {1}.".format(mode, pool_info.pi_mode)) # uid PROGRESS_LOG.info("expected uid is {0}".format(uid)) if pool_info.pi_uid != uid: self.fail("found actual pool uid {0} does not match expected uid " "{1}".format(pool_info.pi_uid, uid)) # gid PROGRESS_LOG.info("expected gid is {0}".format(gid)) if pool_info.pi_gid != gid: self.fail("found actual pool gid {0} does not match expected gid " "{1}".format(pool_info.pi_gid, gid))
def test_multipool_rebuild(self): """ Test ID: Rebuild-002 Test Description: Expand on the basic test by rebuilding 2 pools at once. Use Cases: -- multipool rebuild, single client, various object and record counds :avocado: tags=pool,rebuild,rebuildmulti """ # the rebuild tests need to redo this stuff each time so not in setup # as it usually would be setid = self.params.get("setname", '/run/testparams/setnames/') server_group = self.params.get("server_group", '/server/', 'daos_server') basepath = os.path.normpath(self.build_paths['PREFIX'] + "/../") tmp = self.build_paths['PREFIX'] + '/tmp' self.hostlist = self.params.get("test_machines", '/run/hosts/') hostfile = WriteHostFile.WriteHostFile(self.hostlist, tmp) try: ServerUtils.runServer(hostfile, server_group, basepath) # use the uid/gid of the user running the test, these should # be perfectly valid createuid = os.geteuid() creategid = os.getegid() # parameters used in pool create that are in yaml createmode = self.params.get("mode", '/run/testparams/createmode/') createsetid = self.params.get("setname", '/run/testparams/createset/') createsize = self.params.get("size", '/run/testparams/createsize/') # initialize python pool object then create the underlying # daos storage, the way the code is now the pools should be # on the same storage and have the same service leader pool1 = DaosPool(self.Context) pool2 = DaosPool(self.Context) pool1.create(createmode, createuid, creategid, createsize, createsetid, None) pool2.create(createmode, createuid, creategid, createsize, createsetid, None) # want an open connection during rebuild pool1.connect(1 << 1) pool2.connect(1 << 1) # create containers container1 = DaosContainer(self.Context) container1.create(pool1.handle) container2 = DaosContainer(self.Context) container2.create(pool2.handle) # now open them container1.open() container2.open() # how many objects and records are we creating objcount = self.params.get("objcount", '/run/testparams/numobjects/*') reccount = self.params.get("reccount", '/run/testparams/numrecords/*') if objcount == 0: reccount = 0 # which rank to write to and kill rank = self.params.get("rank", '/run/testparams/ranks/*') # how much data to write with each key size = self.params.get("size", '/run/testparams/datasize/') # Putting the same data in both pools, at least for now to simplify # checking its correct saved_data = [] for i in range(0, objcount): obj = None for j in range(0, reccount): # make some stuff up and write dkey = ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(5)) akey = ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(5)) data = ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(size)) obj, tx = container1.write_an_obj(data, len(data), dkey, akey, obj, rank) obj, tx = container2.write_an_obj(data, len(data), dkey, akey, obj, rank) saved_data.append((obj, dkey, akey, data, tx)) # read the data back and make sure its correct # containers data2 = container1.read_an_obj(size, dkey, akey, obj, tx) if data != data2.value: self.fail( "Wrote data P1, read it back, didn't match\n") # containers data2 = container2.read_an_obj(size, dkey, akey, obj, tx) if data != data2.value: self.fail( "Wrote data P2, read it back, didn't match\n") # kill a server server = DaosServer(self.Context, server_group, rank) server.kill(1) # temporarily, the exclude of a failed target must be done # manually pool1.exclude([rank]) pool2.exclude([rank]) # check that rebuild finishes, no errors, progress data as # know it to be. Check pool 1 first then we'll check 2 below. while True: pool1.pool_query() if pool1.pool_info.pi_rebuild_st.rs_done == 1: break else: time.sleep(2) # check there are no errors and other data matches what we # apriori know to be true, if pool1.pool_info.pi_ndisabled != 1: self.fail( "P1 number disabled targets reporting incorrectly: {}". format(pool1.pool_info.pi_ndisabled)) if pool1.pool_info.pi_rebuild_st.rs_errno != 0: self.fail("P1 rebuild error reported: {}".format( pool1.pool_info.pi_rebuild_st.rs_errno)) if pool1.pool_info.pi_rebuild_st.rs_obj_nr != objcount: self.fail("P1 rebuilt objs not as expected: {0} {1}".format( pool1.pool_info.pi_rebuild_st.rs_obj_nr, objcount)) if pool1.pool_info.pi_rebuild_st.rs_rec_nr != (reccount * objcount): self.fail("P1 rebuilt recs not as expected: {0} {1}".format( pool1.pool_info.pi_rebuild_st.rs_rec_nr, reccount * objcount)) # now that the rebuild finished verify the records are correct for tup in saved_data: data2 = container1.read_an_obj(len(tup[3]), tup[1], tup[2], tup[0], tup[4]) if tup[3] != data2.value: self.fail("after rebuild data didn't check out") # now check the other pool while True: pool2.pool_query() if pool2.pool_info.pi_rebuild_st.rs_done == 1: break else: time.sleep(2) # check there are no errors and other data matches what we # apriori know to be true if pool2.pool_info.pi_ndisabled != 1: self.fail( "Number disabled targets reporting incorrectly: {}".format( pool2.pool_info.pi_ndisabled)) if pool2.pool_info.pi_rebuild_st.rs_errno != 0: self.fail("Rebuild error reported: {}".format( pool2.pool_info.pi_rebuild_st.rs_errno)) if pool2.pool_info.pi_rebuild_st.rs_obj_nr != objcount: self.fail("Rebuilt objs not as expected: {0} {1}".format( pool2.pool_info.pi_rebuild_st.rs_obj_nr, objcount)) if pool2.pool_info.pi_rebuild_st.rs_rec_nr != (reccount * objcount): self.fail("Rebuilt recs not as expected: {0} {1}".format( pool2.pool_info.pi_rebuild_st.rs_rec_nr, (reccount * objcount))) # now that the rebuild finished verify the records are correct for tup in saved_data: data2 = container2.read_an_obj(len(tup[3]), tup[1], tup[2], tup[0], tup[4]) if tup[3] != data2.value: self.fail("after rebuild data didn't check out") except DaosApiError as e: print(e) print(traceback.format_exc()) self.fail("Expecting to pass but test has failed.\n") finally: ServerUtils.stopServer(hosts=self.hostlist) os.remove(hostfile) CheckForPool.CleanupPools(self.hostlist) ServerUtils.killServer(self.hostlist)
class PoolSvc(TestWithServers): """ Tests svc argument while pool create. :avocado: recursive """ def tearDown(self): try: if self.pool is not None and self.pool.attached: self.pool.destroy(1) finally: super(PoolSvc, self).tearDown() def test_poolsvc(self): """ Test svc arg during pool create. :avocado: tags=pool,svc """ # parameters used in pool create createmode = self.params.get("mode", '/run/createtests/createmode/*/') createuid = os.geteuid() creategid = os.getegid() createsetid = self.params.get("setname", '/run/createtests/createset/') createsize = self.params.get("size", '/run/createtests/createsize/') createsvc = self.params.get("svc", '/run/createtests/createsvc/*/') expected_result = createsvc[1] try: # initialize a python pool object then create the underlying # daos storage self.pool = DaosPool(self.context) self.pool.create(createmode, createuid, creategid, createsize, createsetid, None, None, createsvc[0]) self.pool.connect(1 << 1) # checking returned rank list for server more than 1 iterator = 0 while (int(self.pool.svc.rl_ranks[iterator]) > 0 and int(self.pool.svc.rl_ranks[iterator]) <= createsvc[0] and int(self.pool.svc.rl_ranks[iterator]) != 999999): iterator += 1 if iterator != createsvc[0]: self.fail("Length of Returned Rank list is not equal to " "the number of Pool Service members.\n") rank_list = [] for iterator in range(createsvc[0]): rank_list.append(int(self.pool.svc.rl_ranks[iterator])) if len(rank_list) != len(set(rank_list)): self.fail("Duplicate values in returned rank list") self.pool.pool_query() leader = self.pool.pool_info.pi_leader if createsvc[0] == 3: # kill pool leader and exclude it self.pool.pool_svc_stop() self.pool.exclude([leader]) # perform pool disconnect, try connect again and disconnect self.pool.disconnect() self.pool.connect(1 << 1) self.pool.disconnect() # kill another server which is not a leader and exclude it server = DaosServer(self.context, self.server_group, leader - 1) server.kill(1) self.pool.exclude([leader - 1]) # perform pool connect self.pool.connect(1 << 1) if expected_result in ['FAIL']: self.fail("Test was expected to fail but it passed.\n") except DaosApiError as excep: print(excep) print(traceback.format_exc()) if expected_result == 'PASS': self.fail("Test was expected to pass but it failed.\n")
def test_query(self): """ Pass bad parameters to pool query :avocado: tags=pool,poolquery,badparam,badquery """ # parameters used in pool create/connect connectmode = self.params.get("mode", '/run/querytests/connectmode/') createmode = self.params.get("mode", '/run/querytests/createmode/') createuid = self.params.get("uid", '/run/querytests/createuid/') creategid = self.params.get("gid", '/run/querytests/creategid/') createsetid = self.params.get("setname", '/run/querytests/createset/') createsize = self.params.get("size", '/run/querytests/createsize/') # Accumulate a list of pass/fail indicators representing what is # expected for each parameter then "and" them to determine the # expected result of the test expected_for_param = [] handlelist = self.params.get("handle", '/run/querytests/handles/*/') handle = handlelist[0] expected_for_param.append(handlelist[1]) infolist = self.params.get("info", '/run/querytests/infoptr/*/') dummy_infoptr = infolist[0] expected_for_param.append(infolist[1]) # if any parameter is FAIL then the test should FAIL, in this test # virtually everyone should FAIL since we are testing bad parameters expected_result = 'PASS' for result in expected_for_param: if result == 'FAIL': expected_result = 'FAIL' break try: # setup the DAOS python API with open('../../../.build_vars.json') as build_file: data = json.load(build_file) context = DaosContext(data['PREFIX'] + '/lib/') # initialize a python pool object then create the underlying # daos storage pool = DaosPool(context) pool.create(createmode, createuid, creategid, createsize, createsetid, None) pool.connect(connectmode) # trash the pool handle value if not handle == 'VALID': pool.handle = handle pool.pool_query() if expected_result in ['FAIL']: self.fail("Test was expected to fail but it passed.\n") except DaosApiError as excep: print(excep) print(traceback.format_exc()) if expected_result in ['PASS']: self.fail("Test was expected to pass but it failed.\n")
def test_multipool_rebuild(self): """ Test ID: Rebuild-002 Test Description: Expand on the basic test by rebuilding 2 pools at once. Use Cases: -- multipool rebuild, single client, various object and record counds :avocado: tags=pool,rebuild,rebuildmulti """ try: # initialize python pool object then create the underlying # daos storage, the way the code is now the pools should be # on the same storage and have the same service leader pool1 = DaosPool(self.context) pool2 = DaosPool(self.context) pool1.create(self.createmode, self.createuid, self.creategid, self.createsize, self.createsetid) pool2.create(self.createmode, self.createuid, self.creategid, self.createsize, self.createsetid) # want an open connection during rebuild pool1.connect(1 << 1) pool2.connect(1 << 1) # create containers container1 = DaosContainer(self.context) container1.create(pool1.handle) container2 = DaosContainer(self.context) container2.create(pool2.handle) # now open them container1.open() container2.open() # Putting the same data in both pools, at least for now to simplify # checking its correct saved_data = [] for _objc in range(self.objcount): obj = None for _recc in range(self.reccount): # make some stuff up and write dkey = ( ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(5))) akey = ( ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(5))) data = ( ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(self.size))) # Used DAOS_OC_R1S_SPEC_RANK # 1 replica with specified rank obj, txn = container1.write_an_obj(data, len(data), dkey, akey, obj, self.rank, obj_cls=15) obj, txn = container2.write_an_obj(data, len(data), dkey, akey, obj, self.rank, obj_cls=15) saved_data.append((obj, dkey, akey, data, txn)) # read the data back and make sure its correct containers data2 = container1.read_an_obj(self.size, dkey, akey, obj, txn) if data != data2.value: self.fail("Wrote data P1, read it back, didn't match\n") data2 = container2.read_an_obj(self.size, dkey, akey, obj, txn) if data != data2.value: self.fail("Wrote data P2, read it back, didn't match\n") # kill a server server = DaosServer(self.context, self.server_group, self.rank) server.kill(1) # temporarily, the exclude of a failed target must be done # manually pool1.exclude([self.rank]) pool2.exclude([self.rank]) # check that rebuild finishes, no errors, progress data as # know it to be. Check pool 1 first then we'll check 2 below. while True: pool1.pool_query() if pool1.pool_info.pi_rebuild_st.rs_done == 1: break else: time.sleep(2) # check there are no errors and other data matches what we # apriori know to be true, if pool1.pool_info.pi_ndisabled != 1: self.fail("P1 number disabled targets reporting incorrectly: {}" .format(pool1.pool_info.pi_ndisabled)) if pool1.pool_info.pi_rebuild_st.rs_errno != 0: self.fail("P1 rebuild error reported: {}" .format(pool1.pool_info.pi_rebuild_st.rs_errno)) if pool1.pool_info.pi_rebuild_st.rs_obj_nr != self.objcount: self.fail("P1 rebuilt objs not as expected: {0} {1}" .format(pool1.pool_info.pi_rebuild_st.rs_obj_nr, self.objcount)) if (pool1.pool_info.pi_rebuild_st.rs_rec_nr != (self.reccount*self.objcount)): self.fail("P1 rebuilt recs not as expected: {0} {1}" .format(pool1.pool_info.pi_rebuild_st.rs_rec_nr, self.reccount*self.objcount)) # now that the rebuild finished verify the records are correct for tup in saved_data: data2 = container1.read_an_obj(len(tup[3]), tup[1], tup[2], tup[0], tup[4]) if tup[3] != data2.value: self.fail("after rebuild data didn't check out") # now check the other pool while True: pool2.pool_query() if pool2.pool_info.pi_rebuild_st.rs_done == 1: break else: time.sleep(2) # check there are no errors and other data matches what we # apriori know to be true if pool2.pool_info.pi_ndisabled != 1: self.fail("Number disabled targets reporting incorrectly: {}" .format(pool2.pool_info.pi_ndisabled)) if pool2.pool_info.pi_rebuild_st.rs_errno != 0: self.fail("Rebuild error reported: {}" .format(pool2.pool_info.pi_rebuild_st.rs_errno)) if pool2.pool_info.pi_rebuild_st.rs_obj_nr != self.objcount: self.fail("Rebuilt objs not as expected: {0} {1}" .format(pool2.pool_info.pi_rebuild_st.rs_obj_nr, self.objcount)) if (pool2.pool_info.pi_rebuild_st.rs_rec_nr != (self.reccount*self.objcount)): self.fail("Rebuilt recs not as expected: {0} {1}". format(pool2.pool_info.pi_rebuild_st.rs_rec_nr, (self.reccount*self.objcount))) # now that the rebuild finished verify the records are correct for tup in saved_data: data2 = container2.read_an_obj(len(tup[3]), tup[1], tup[2], tup[0], tup[4]) if tup[3] != data2.value: self.fail("after rebuild data didn't check out") except DaosApiError as excp: print (excp) print (traceback.format_exc()) self.fail("Expecting to pass but test has failed.\n")
class NvmeIo(avocado.Test): """ Test Class Description: Test the general Metadata operations and boundary conditions. """ def setUp(self): self.pool = None self.hostlist = None self.hostfile_clients = None self.hostfile = None self.out_queue = None self.pool_connect = False with open('../../../.build_vars.json') as json_f: build_paths = json.load(json_f) self.basepath = os.path.normpath(build_paths['PREFIX'] + "/../") self.server_group = self.params.get("name", '/server_config/', 'daos_server') self.context = DaosContext(build_paths['PREFIX'] + '/lib/') self.d_log = DaosLog(self.context) self.hostlist = self.params.get("servers", '/run/hosts/*') self.hostfile = write_host_file.write_host_file(self.hostlist, self.workdir) #Start Server server_utils.run_server(self.hostfile, self.server_group, self.basepath) def tearDown(self): try: if self.pool_connect: self.pool.disconnect() self.pool.destroy(1) finally: server_utils.stop_server(hosts=self.hostlist) def verify_pool_size(self, original_pool_info, ior_args): """ Function is to validate the pool size original_pool_info: Pool info prior to IOR ior_args: IOR args to calculate the file size """ #Get the current pool size for comparison current_pool_info = self.pool.pool_query() #if Transfer size is < 4K, Pool size will verified against NVMe, else #it will be checked against SCM if ior_args['stripe_size'] >= 4096: print("Size is > 4K,Size verification will be done with NVMe size") storage_index = 1 else: print("Size is < 4K,Size verification will be done with SCM size") storage_index = 0 free_pool_size = ( original_pool_info.pi_space.ps_space.s_free[storage_index] - current_pool_info.pi_space.ps_space.s_free[storage_index]) obj_multiplier = 1 replica_number = re.findall(r'\d+', "ior_args['object_class']") if replica_number: obj_multiplier = int(replica_number[0]) expected_pool_size = (ior_args['slots'] * ior_args['block_size'] * obj_multiplier) if free_pool_size < expected_pool_size: raise DaosTestError( 'Pool Free Size did not match Actual = {} Expected = {}' .format(free_pool_size, expected_pool_size)) @avocado.fail_on(DaosApiError) def test_nvme_io(self): """ Test ID: DAOS-2082 Test Description: Test will run IOR with standard and non standard sizes.IOR will be run for all Object type supported. Purpose is to verify pool size (SCM and NVMe) for IOR file. This test is running multiple IOR on same server start instance. :avocado: tags=nvme,nvme_io,large """ ior_args = {} hostlist_clients = self.params.get("clients", '/run/hosts/*') tests = self.params.get("ior_sequence", '/run/ior/*') object_type = self.params.get("object_type", '/run/ior/*') #Loop for every IOR object type for obj_type in object_type: for ior_param in tests: self.hostfile_clients = write_host_file.write_host_file( hostlist_clients, self.workdir, ior_param[4]) #There is an issue with NVMe if Transfer size>64M, Skipped this #sizes for now if ior_param[2] > 67108864: print ("Xfersize > 64M getting failed, DAOS-1264") continue self.pool = DaosPool(self.context) self.pool.create(self.params.get("mode", '/run/pool/createmode/*'), os.geteuid(), os.getegid(), ior_param[0], self.params.get("setname", '/run/pool/createset/*'), nvme_size=ior_param[1]) self.pool.connect(1 << 1) self.pool_connect = True createsvc = self.params.get("svcn", '/run/pool/createsvc/') svc_list = "" for i in range(createsvc): svc_list += str(int(self.pool.svc.rl_ranks[i])) + ":" svc_list = svc_list[:-1] ior_args['client_hostfile'] = self.hostfile_clients ior_args['pool_uuid'] = self.pool.get_uuid_str() ior_args['svc_list'] = svc_list ior_args['basepath'] = self.basepath ior_args['server_group'] = self.server_group ior_args['tmp_dir'] = self.workdir ior_args['iorflags'] = self.params.get("iorflags", '/run/ior/*') ior_args['iteration'] = self.params.get("iteration", '/run/ior/*') ior_args['stripe_size'] = ior_param[2] ior_args['block_size'] = ior_param[3] ior_args['stripe_count'] = self.params.get("stripecount", '/run/ior/*') ior_args['async_io'] = self.params.get("asyncio", '/run/ior/*') ior_args['object_class'] = obj_type ior_args['slots'] = ior_param[4] #IOR is going to use the same --daos.stripeSize, #--daos.recordSize and Transfer size. try: size_before_ior = self.pool.pool_query() ior_utils.run_ior(ior_args['client_hostfile'], ior_args['iorflags'], ior_args['iteration'], ior_args['block_size'], ior_args['stripe_size'], ior_args['pool_uuid'], ior_args['svc_list'], ior_args['stripe_size'], ior_args['stripe_size'], ior_args['stripe_count'], ior_args['async_io'], ior_args['object_class'], ior_args['basepath'], ior_args['slots'], filename=str(uuid.uuid4()), display_output=True) self.verify_pool_size(size_before_ior, ior_args) except ior_utils.IorFailed as exe: print (exe) print (traceback.format_exc()) self.fail() try: if self.pool_connect: self.pool.disconnect() self.pool_connect = False if self.pool: self.pool.destroy(1) except DaosApiError as exe: print (exe) self.fail("Failed to Destroy/Disconnect the Pool")
def test_simple_rebuild(self): """ Test ID: Rebuild-001 Test Description: The most basic rebuild test. Use Cases: -- single pool rebuild, single client, various reord/object counts :avocado: tags=pool,rebuild,rebuildsimple """ try: # initialize a python pool object then create the underlying # daos storage pool = DaosPool(self.context) pool.create(self.createmode, self.createuid, self.creategid, self.createsize, self.createsetid) # want an open connection during rebuild pool.connect(1 << 1) # get pool status we want to test later pool.pool_query() if pool.pool_info.pi_ndisabled != 0: self.fail( "Number of disabled targets reporting incorrectly.\n") if pool.pool_info.pi_rebuild_st.rs_errno != 0: self.fail("Rebuild error but rebuild hasn't run.\n") if pool.pool_info.pi_rebuild_st.rs_done != 1: self.fail("Rebuild is running but device hasn't failed yet.\n") if pool.pool_info.pi_rebuild_st.rs_obj_nr != 0: self.fail("Rebuilt objs not zero.\n") if pool.pool_info.pi_rebuild_st.rs_rec_nr != 0: self.fail("Rebuilt recs not zero.\n") # create a container container = DaosContainer(self.context) container.create(pool.handle) # now open it container.open() saved_data = [] for _objc in range(self.objcount): obj = None for _recc in range(self.reccount): # make some stuff up and write dkey = (''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(5))) akey = (''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(5))) data = (''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(self.size))) obj, txn = container.write_an_obj(data, len(data), dkey, akey, obj, self.rank, obj_cls=16) saved_data.append((obj, dkey, akey, data, txn)) # read the data back and make sure its correct data2 = container.read_an_obj(self.size, dkey, akey, obj, txn) if data != data2.value: self.fail("Write data 1, read it back, didn't match\n") # kill a server that has server = DaosServer(self.context, self.server_group, self.rank) server.kill(1) # temporarily, the exclude of a failed target must be done manually pool.exclude([self.rank]) while True: # get the pool/rebuild status again pool.pool_query() if pool.pool_info.pi_rebuild_st.rs_done == 1: break else: time.sleep(2) if pool.pool_info.pi_ndisabled != 1: self.fail( "Number of disabled targets reporting incorrectly: {}". format(pool.pool_info.pi_ndisabled)) if pool.pool_info.pi_rebuild_st.rs_errno != 0: self.fail("Rebuild error reported: {}".format( pool.pool_info.pi_rebuild_st.rs_errno)) if pool.pool_info.pi_rebuild_st.rs_obj_nr != self.objcount: self.fail("Rebuilt objs not as expected: {0} {1}".format( pool.pool_info.pi_rebuild_st.rs_obj_nr, self.objcount)) if (pool.pool_info.pi_rebuild_st.rs_rec_nr != (self.reccount * self.objcount)): self.fail("Rebuilt recs not as expected: {0} {1}".format( pool.pool_info.pi_rebuild_st.rs_rec_nr, self.reccount * self.objcount)) # now that the rebuild finished verify the records are correct for tup in saved_data: data2 = container.read_an_obj(len(tup[3]), tup[1], tup[2], tup[0], tup[4]) if tup[3] != data2.value: self.fail("after rebuild data didn't check out") except DaosApiError as excp: print(excp) print(traceback.format_exc()) self.fail("Expecting to pass but test has failed.\n")
def test_multipool_rebuild(self): """ Test ID: Rebuild-002 Test Description: Expand on the basic test by rebuilding 2 pools at once. Use Cases: -- multipool rebuild, single client, various object and record counds :avocado: tags=pool,rebuild,rebuildmulti """ try: # initialize python pool object then create the underlying # daos storage, the way the code is now the pools should be # on the same storage and have the same service leader pool1 = DaosPool(self.context) pool2 = DaosPool(self.context) pool1.create(self.createmode, self.createuid, self.creategid, self.createsize, self.createsetid) pool2.create(self.createmode, self.createuid, self.creategid, self.createsize, self.createsetid) # want an open connection during rebuild pool1.connect(1 << 1) pool2.connect(1 << 1) # create containers container1 = DaosContainer(self.context) container1.create(pool1.handle) container2 = DaosContainer(self.context) container2.create(pool2.handle) # now open them container1.open() container2.open() # Putting the same data in both pools, at least for now to simplify # checking its correct saved_data = [] for _objc in range(self.objcount): obj = None for _recc in range(self.reccount): # make some stuff up and write dkey = (''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(5))) akey = (''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(5))) data = (''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(self.size))) # Used DAOS_OC_R1S_SPEC_RANK # 1 replica with specified rank obj, txn = container1.write_an_obj(data, len(data), dkey, akey, obj, self.rank, obj_cls=15) obj, txn = container2.write_an_obj(data, len(data), dkey, akey, obj, self.rank, obj_cls=15) saved_data.append((obj, dkey, akey, data, txn)) # read the data back and make sure its correct containers data2 = container1.read_an_obj(self.size, dkey, akey, obj, txn) if data != data2.value: self.fail( "Wrote data P1, read it back, didn't match\n") data2 = container2.read_an_obj(self.size, dkey, akey, obj, txn) if data != data2.value: self.fail( "Wrote data P2, read it back, didn't match\n") # kill a server server = DaosServer(self.context, self.server_group, self.rank) server.kill(1) # temporarily, the exclude of a failed target must be done # manually pool1.exclude([self.rank]) pool2.exclude([self.rank]) # check that rebuild finishes, no errors, progress data as # know it to be. Check pool 1 first then we'll check 2 below. while True: pool1.pool_query() if pool1.pool_info.pi_rebuild_st.rs_done == 1: break else: time.sleep(2) # check there are no errors and other data matches what we # apriori know to be true, if pool1.pool_info.pi_ndisabled != 1: self.fail( "P1 number disabled targets reporting incorrectly: {}". format(pool1.pool_info.pi_ndisabled)) if pool1.pool_info.pi_rebuild_st.rs_errno != 0: self.fail("P1 rebuild error reported: {}".format( pool1.pool_info.pi_rebuild_st.rs_errno)) if pool1.pool_info.pi_rebuild_st.rs_obj_nr != self.objcount: self.fail("P1 rebuilt objs not as expected: {0} {1}".format( pool1.pool_info.pi_rebuild_st.rs_obj_nr, self.objcount)) if (pool1.pool_info.pi_rebuild_st.rs_rec_nr != (self.reccount * self.objcount)): self.fail("P1 rebuilt recs not as expected: {0} {1}".format( pool1.pool_info.pi_rebuild_st.rs_rec_nr, self.reccount * self.objcount)) # now that the rebuild finished verify the records are correct for tup in saved_data: data2 = container1.read_an_obj(len(tup[3]), tup[1], tup[2], tup[0], tup[4]) if tup[3] != data2.value: self.fail("after rebuild data didn't check out") # now check the other pool while True: pool2.pool_query() if pool2.pool_info.pi_rebuild_st.rs_done == 1: break else: time.sleep(2) # check there are no errors and other data matches what we # apriori know to be true if pool2.pool_info.pi_ndisabled != 1: self.fail( "Number disabled targets reporting incorrectly: {}".format( pool2.pool_info.pi_ndisabled)) if pool2.pool_info.pi_rebuild_st.rs_errno != 0: self.fail("Rebuild error reported: {}".format( pool2.pool_info.pi_rebuild_st.rs_errno)) if pool2.pool_info.pi_rebuild_st.rs_obj_nr != self.objcount: self.fail("Rebuilt objs not as expected: {0} {1}".format( pool2.pool_info.pi_rebuild_st.rs_obj_nr, self.objcount)) if (pool2.pool_info.pi_rebuild_st.rs_rec_nr != (self.reccount * self.objcount)): self.fail("Rebuilt recs not as expected: {0} {1}".format( pool2.pool_info.pi_rebuild_st.rs_rec_nr, (self.reccount * self.objcount))) # now that the rebuild finished verify the records are correct for tup in saved_data: data2 = container2.read_an_obj(len(tup[3]), tup[1], tup[2], tup[0], tup[4]) if tup[3] != data2.value: self.fail("after rebuild data didn't check out") except DaosApiError as excp: print(excp) print(traceback.format_exc()) self.fail("Expecting to pass but test has failed.\n") finally: server_utils.stop_server(hosts=self.hostlist_servers) check_for_pool.cleanup_pools(self.hostlist_servers) server_utils.kill_server(self.hostlist_servers)
class DestroyRebuild(Test): """ Test Class Description: This test verifies destruction of a pool that is rebuilding. :avocado: tags=pool,pooldestroy,rebuild,desreb """ build_paths = [] server_group = "" context = None pool = None hostfile = "" def setUp(self): """ setup for the test """ self.agent_sessions = None # get paths from the build_vars generated by build with open('../../../.build_vars.json') as build_file: build_paths = json.load(build_file) self.context = DaosContext(build_paths['PREFIX'] + '/lib/') self.basepath = os.path.normpath(build_paths['PREFIX'] + "/../") # generate a hostfile self.hostlist = self.params.get("test_machines", '/run/hosts/') self.hostfile = write_host_file.write_host_file(self.hostlist, self.workdir) # fire up the DAOS servers self.server_group = self.params.get("name", '/run/server_config/', 'daos_server') self.agent_sessions = AgentUtils.run_agent(self.basepath, self.hostlist) server_utils.run_server(self.hostfile, self.server_group, build_paths['PREFIX'] + '/../') # create a pool to test with createmode = self.params.get("mode", '/run/pool/createmode/') createuid = self.params.get("uid", '/run/pool/createuid/') creategid = self.params.get("gid", '/run/pool/creategid/') createsetid = self.params.get("setname", '/run/pool/createset/') createsize = self.params.get("size", '/run/pool/createsize/') self.pool = DaosPool(self.context) self.pool.create(createmode, createuid, creategid, createsize, createsetid) self.pool.get_uuid_str() time.sleep(2) def tearDown(self): """ cleanup after the test """ try: os.remove(self.hostfile) if self.pool: self.pool.destroy(1) finally: if self.agent_sessions: AgentUtils.stop_agent(self.hostlist, self.agent_sessions) server_utils.stop_server(hosts=self.hostlist) def test_destroy_while_rebuilding(self): """ :avocado: tags=pool,pooldestroy,rebuild,desreb """ try: print("\nsetup complete, starting test\n") # create a server object that references on of our pool target hosts # and then kill it svr_to_kill = int(self.params.get("rank_to_kill", '/run/testparams/ranks/')) server = DaosServer(self.context, bytes(self.server_group), svr_to_kill) print("created server ") # BUG if you don't connect the rebuild doesn't start correctly self.pool.connect(1 << 1) status = self.pool.pool_query() if not status.pi_ntargets == len(self.hostlist): self.fail("target count wrong.\n") if not status.pi_ndisabled == 0: self.fail("disabled target count wrong.\n") print("connect ") time.sleep(1) server.kill(1) print("killed server ") # exclude the target from the dead server self.pool.exclude([svr_to_kill]) print("exclude target ") #self.pool.disconnect() #print "disconnect " # the rebuild won't take long since there is no data so do # the destroy quickly self.pool.destroy(1) print("destroy ") except DaosApiError as excep: print(excep) print(traceback.format_exc()) self.fail("Expecting to pass but test has failed.\n")
def test_simple_rebuild(self): """ Test ID: Rebuild-001 Test Description: The most basic rebuild test. Use Cases: -- single pool rebuild, single client, various reord/object counts :avocado: tags=pool,rebuild,rebuildsimple """ try: # initialize a python pool object then create the underlying # daos storage pool = DaosPool(self.context) pool.create(self.createmode, self.createuid, self.creategid, self.createsize, self.createsetid) # want an open connection during rebuild pool.connect(1 << 1) # get pool status we want to test later pool.pool_query() if pool.pool_info.pi_ndisabled != 0: self.fail("Number of disabled targets reporting incorrectly.\n") if pool.pool_info.pi_rebuild_st.rs_errno != 0: self.fail("Rebuild error but rebuild hasn't run.\n") if pool.pool_info.pi_rebuild_st.rs_done != 1: self.fail("Rebuild is running but device hasn't failed yet.\n") if pool.pool_info.pi_rebuild_st.rs_obj_nr != 0: self.fail("Rebuilt objs not zero.\n") if pool.pool_info.pi_rebuild_st.rs_rec_nr != 0: self.fail("Rebuilt recs not zero.\n") # create a container container = DaosContainer(self.context) container.create(pool.handle) # now open it container.open() saved_data = [] for _objc in range(self.objcount): obj = None for _recc in range(self.reccount): # make some stuff up and write dkey = ( ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(5))) akey = ( ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(5))) data = (''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(self.size))) obj, txn = container.write_an_obj(data, len(data), dkey, akey, obj, self.rank, obj_cls=16) saved_data.append((obj, dkey, akey, data, txn)) # read the data back and make sure its correct data2 = container.read_an_obj(self.size, dkey, akey, obj, txn) if data != data2.value: self.fail("Write data 1, read it back, didn't match\n") # kill a server that has server = DaosServer(self.context, self.server_group, self.rank) server.kill(1) # temporarily, the exclude of a failed target must be done manually pool.exclude([self.rank]) while True: # get the pool/rebuild status again pool.pool_query() if pool.pool_info.pi_rebuild_st.rs_done == 1: break else: time.sleep(2) if pool.pool_info.pi_ndisabled != 1: self.fail("Number of disabled targets reporting incorrectly: {}" .format(pool.pool_info.pi_ndisabled)) if pool.pool_info.pi_rebuild_st.rs_errno != 0: self.fail("Rebuild error reported: {}" .format(pool.pool_info.pi_rebuild_st.rs_errno)) if pool.pool_info.pi_rebuild_st.rs_obj_nr != self.objcount: self.fail("Rebuilt objs not as expected: {0} {1}" .format(pool.pool_info.pi_rebuild_st.rs_obj_nr, self.objcount)) if (pool.pool_info.pi_rebuild_st.rs_rec_nr != (self.reccount*self.objcount)): self.fail("Rebuilt recs not as expected: {0} {1}" .format(pool.pool_info.pi_rebuild_st.rs_rec_nr, self.reccount*self.objcount)) # now that the rebuild finished verify the records are correct for tup in saved_data: data2 = container.read_an_obj(len(tup[3]), tup[1], tup[2], tup[0], tup[4]) if tup[3] != data2.value: self.fail("after rebuild data didn't check out") except DaosApiError as excp: print (excp) print (traceback.format_exc()) self.fail("Expecting to pass but test has failed.\n")
class FullPoolContainerCreate(Test): """ Class for test to create a container in a pool with no remaining free space. """ def setUp(self): self.agent_sessions = None # get paths from the build_vars generated by build with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../../../.build_vars.json")) as build_file: build_paths = json.load(build_file) self.basepath = os.path.normpath(build_paths['PREFIX'] + "/../") self.server_group = self.params.get("name", '/server_config/', 'daos_default_oops') self.context = DaosContext(build_paths['PREFIX'] + '/lib/') self.cont = None self.cont2 = None self.pool = DaosPool(self.context) self.d_log = DaosLog(self.context) self.hostlist = self.params.get("test_machines1", '/hosts/') self.hostfile = write_host_file.write_host_file(self.hostlist, self.workdir) self.agent_sessions = AgentUtils.run_agent(self.basepath, self.hostlist) server_utils.run_server(self.hostfile, self.server_group, self.basepath) def tearDown(self): # shut 'er down """ wrap pool destroy in a try; in case pool create didn't succeed, we still need the server to be shut down in any case """ try: self.pool.destroy(1) finally: if self.agent_sessions: AgentUtils.stop_agent(self.hostlist, self.agent_sessions) server_utils.stop_server(hosts=self.hostlist) def test_no_space_cont_create(self): """ :avocado: tags=pool,cont,fullpoolcontcreate,small,vm """ # full storage rc err = "-1007" # probably should be -1007, revisit later err2 = "-1009" # create pool mode = self.params.get("mode", '/conttests/createmode/') self.d_log.debug("mode is {0}".format(mode)) uid = os.geteuid() gid = os.getegid() # 16 mb pool, minimum size currently possible size = 16777216 self.d_log.debug("creating pool") self.pool.create(mode, uid, gid, size, self.server_group, None) self.d_log.debug("created pool") # connect to the pool self.d_log.debug("connecting to pool") self.pool.connect(1 << 1) self.d_log.debug("connected to pool") # query the pool self.d_log.debug("querying pool info") dummy_pool_info = self.pool.pool_query() self.d_log.debug("queried pool info") # create a container try: self.d_log.debug("creating container") self.cont = DaosContainer(self.context) self.cont.create(self.pool.handle) self.d_log.debug("created container") except DaosApiError as excep: self.d_log.error("caught exception creating container: " "{0}".format(excep)) self.fail("caught exception creating container: {0}".format(excep)) self.d_log.debug("opening container") self.cont.open() self.d_log.debug("opened container") # generate random dkey, akey each time # write 1mb until no space, then 1kb, etc. to fill pool quickly for obj_sz in [1048576, 1024, 1]: write_count = 0 while True: self.d_log.debug("writing obj {0}, sz {1} to " "container".format(write_count, obj_sz)) my_str = "a" * obj_sz my_str_sz = obj_sz dkey = ( ''.join(random.choice(string.lowercase) for i in range(5))) akey = ( ''.join(random.choice(string.lowercase) for i in range(5))) try: dummy_oid, dummy_tx = self.cont.write_an_obj(my_str, my_str_sz, dkey, akey, obj_cls=1) self.d_log.debug("wrote obj {0}, sz {1}".format(write_count, obj_sz)) write_count += 1 except DaosApiError as excep: if not (err in repr(excep) or err2 in repr(excep)): self.d_log.error("caught exception while writing " "object: {0}".format(repr(excep))) self.fail("caught exception while writing object: {0}" .format(repr(excep))) else: self.d_log.debug("pool is too full for {0} byte " "objects".format(obj_sz)) break self.d_log.debug("closing container") self.cont.close() self.d_log.debug("closed container") # create a 2nd container now that pool is full try: self.d_log.debug("creating 2nd container") self.cont2 = DaosContainer(self.context) self.cont2.create(self.pool.handle) self.d_log.debug("created 2nd container") self.d_log.debug("opening container 2") self.cont2.open() self.d_log.debug("opened container 2") self.d_log.debug("writing one more object, write expected to fail") self.cont2.write_an_obj(my_str, my_str_sz, dkey, akey, obj_cls=1) self.d_log.debug("wrote one more object--this should never print") except DaosApiError as excep: if not (err in repr(excep) or err2 in repr(excep)): self.d_log.error("caught unexpected exception while " "writing object: {0}".format(repr(excep))) self.fail("caught unexpected exception while writing " "object: {0}".format(repr(excep))) else: self.d_log.debug("correctly caught -1007 while attempting " "to write object in full pool")
class RebuildNoCap(TestWithServers): """ Test Class Description: This class contains tests for pool rebuild. :avocado: recursive """ def setUp(self): super(RebuildNoCap, self).setUp() # create a pool to test with createmode = self.params.get("mode", '/run/pool/createmode/') createuid = self.params.get("uid", '/run/pool/createuid/') creategid = self.params.get("gid", '/run/pool/creategid/') createsetid = self.params.get("setname", '/run/pool/createset/') createsize = self.params.get("size", '/run/pool/createsize/') self.pool = DaosPool(self.context) self.pool.create(createmode, createuid, creategid, createsize, createsetid) uuid = self.pool.get_uuid_str() time.sleep(2) # stuff some bogus data into the pool how_many_bytes = long(self.params.get("datasize", '/run/testparams/datatowrite/')) exepath = self.prefix +\ "/../src/tests/ftest/util/write_some_data.py" cmd = "export DAOS_POOL={0}; export DAOS_SVCL=1; mpirun"\ " --np 1 --host {1} {2} {3} testfile".format( uuid, self.hostlist_servers[0], exepath, how_many_bytes) subprocess.call(cmd, shell=True) def tearDown(self): """ cleanup after the test """ try: if self.pool: self.pool.destroy(1) finally: super(RebuildNoCap, self).tearDown() def test_rebuild_no_capacity(self): """ :avocado: tags=pool,rebuild,nocap """ try: print("\nsetup complete, starting test\n") # create a server object that references on of our pool target hosts # and then kill it svr_to_kill = int(self.params.get("rank_to_kill", '/run/testparams/ranks/')) d_server = DaosServer(self.context, bytes(self.server_group), svr_to_kill) time.sleep(1) d_server.kill(1) # exclude the target from the dead server self.pool.exclude([svr_to_kill]) # exclude should trigger rebuild, check self.pool.connect(1 << 1) status = self.pool.pool_query() if not status.pi_ntargets == len(self.hostlist_servers): self.fail("target count wrong.\n") if not status.pi_ndisabled == 1: self.fail("disabled target count wrong.\n") # the pool should be too full to start a rebuild so # expecting an error # not sure yet specifically what error if status.pi_rebuild_st.rs_errno == 0: self.fail("expecting rebuild to fail but it didn't.\n") except DaosApiError as excep: print(excep) print(traceback.format_exc()) self.fail("Expecting to pass but test has failed.\n")
def test_rebuild_with_io(self): """ Test ID: Rebuild-003 Test Description: Trigger a rebuild while I/O is ongoing. Use Cases: -- single pool, single client performing continous read/write/verify sequence while failure/rebuild is triggered in another process :avocado: tags=pool,rebuild,rebuildwithio """ # the rebuild tests need to redo this stuff each time so not in setup # as it usually would be server_group = self.params.get("name", '/server_config/', 'daos_server') basepath = os.path.normpath(self.build_paths['PREFIX'] + "/../") self.hostlist = self.params.get("test_machines", '/run/hosts/') hostfile = write_host_file.write_host_file(self.hostlist, self.workdir) try: self.agent_sessions = AgentUtils.run_agent(basepath, self.hostlist) server_utils.run_server(hostfile, server_group, basepath) # use the uid/gid of the user running the test, these should # be perfectly valid createuid = os.geteuid() creategid = os.getegid() # parameters used in pool create that are in yaml createmode = self.params.get("mode", '/run/testparams/createmode/') createsetid = self.params.get("setname", '/run/testparams/createset/') createsize = self.params.get("size", '/run/testparams/createsize/') # initialize a python pool object then create the underlying # daos storage pool = DaosPool(self.context) pool.create(createmode, createuid, creategid, createsize, createsetid, None) pool.connect(1 << 1) container = DaosContainer(self.context) container.create(pool.handle) container.open() # get pool status and make sure it all looks good before we start pool.pool_query() if pool.pool_info.pi_ndisabled != 0: self.fail("Number of disabled targets reporting incorrectly.\n") if pool.pool_info.pi_rebuild_st.rs_errno != 0: self.fail("Rebuild error but rebuild hasn't run.\n") if pool.pool_info.pi_rebuild_st.rs_done != 1: self.fail("Rebuild is running but device hasn't failed yet.\n") if pool.pool_info.pi_rebuild_st.rs_obj_nr != 0: self.fail("Rebuilt objs not zero.\n") if pool.pool_info.pi_rebuild_st.rs_rec_nr != 0: self.fail("Rebuilt recs not zero.\n") dummy_pool_version = pool.pool_info.pi_rebuild_st.rs_version # do I/O for 30 seconds dummy_bw = io_utilities.continuous_io(container, 30) # trigger the rebuild rank = self.params.get("rank", '/run/testparams/ranks/*') server = DaosServer(self.context, server_group, rank) server.kill(1) pool.exclude([rank]) # do another 30 seconds of I/O, # waiting for some improvements in server bootstrap # at which point we can move the I/O to a separate client and # really pound it with I/O dummy_bw = io_utilities.continuous_io(container, 30) # wait for the rebuild to finish while True: pool.pool_query() if pool.pool_info.pi_rebuild_st.rs_done == 1: break else: time.sleep(2) # check rebuild statistics if pool.pool_info.pi_ndisabled != 1: self.fail("Number of disabled targets reporting incorrectly: {}" .format(pool.pool_info.pi_ndisabled)) if pool.pool_info.pi_rebuild_st.rs_errno != 0: self.fail("Rebuild error reported: {}".format( pool.pool_info.pi_rebuild_st.rs_errno)) if pool.pool_info.pi_rebuild_st.rs_obj_nr <= 0: self.fail("No objects have been rebuilt.") if pool.pool_info.pi_rebuild_st.rs_rec_nr <= 0: self.fail("No records have been rebuilt.") except (ValueError, DaosApiError) as excep: print(excep) print(traceback.format_exc()) self.fail("Expecting to pass but test has failed.\n") finally: # wait for the I/O process to finish try: server_utils.stop_server(hosts=self.hostlist) os.remove(hostfile) # really make sure everything is gone check_for_pool.cleanup_pools(self.hostlist) finally: if self.agent_sessions: AgentUtils.stop_agent(self.hostlist, self.agent_sessions) server_utils.kill_server(self.hostlist)
class NvmeIo(TestWithServers): """ Test Class Description: Test the general Metadata operations and boundary conditions. :avocado: recursive """ def setUp(self): super(NvmeIo, self).setUp() # initialize variables self.out_queue = None self.pool_connect = False def verify_pool_size(self, original_pool_info, ior_args): """ Function is to validate the pool size original_pool_info: Pool info prior to IOR ior_args: IOR args to calculate the file size """ #Get the current pool size for comparison current_pool_info = self.pool.pool_query() #if Transfer size is < 4K, Pool size will verified against NVMe, else #it will be checked against SCM if ior_args['transfer_size'] >= 4096: print("Size is > 4K,Size verification will be done with NVMe size") storage_index = 1 else: print("Size is < 4K,Size verification will be done with SCM size") storage_index = 0 free_pool_size = ( original_pool_info.pi_space.ps_space.s_free[storage_index] - current_pool_info.pi_space.ps_space.s_free[storage_index]) obj_multiplier = 1 replica_number = re.findall(r'\d+', "ior_args['object_class']") if replica_number: obj_multiplier = int(replica_number[0]) expected_pool_size = (ior_args['client_processes'] * ior_args['block_size'] * obj_multiplier) if free_pool_size < expected_pool_size: raise DaosTestError( 'Pool Free Size did not match Actual = {} Expected = {}' .format(free_pool_size, expected_pool_size)) @avocado.fail_on(DaosApiError) def test_nvme_io(self): """ Test ID: DAOS-2082 Test Description: Test will run IOR with standard and non standard sizes.IOR will be run for all Object type supported. Purpose is to verify pool size (SCM and NVMe) for IOR file. This test is running multiple IOR on same server start instance. :avocado: tags=nvme,nvme_io,large """ ior_args = {} tests = self.params.get("ior_sequence", '/run/ior/*') object_type = self.params.get("object_type", '/run/ior/*') #Loop for every IOR object type for obj_type in object_type: for ior_param in tests: #There is an issue with NVMe if Transfer size>64M, Skipped this #sizes for now if ior_param[2] > 67108864: print ("Xfersize > 64M getting failed, DAOS-1264") continue self.pool = DaosPool(self.context) self.pool.create(self.params.get("mode", '/run/pool/createmode/*'), os.geteuid(), os.getegid(), ior_param[0], self.params.get("setname", '/run/pool/createset/*'), nvme_size=ior_param[1]) self.pool.connect(1 << 1) self.pool_connect = True createsvc = self.params.get("svcn", '/run/pool/createsvc/') svc_list = "" for i in range(createsvc): svc_list += str(int(self.pool.svc.rl_ranks[i])) + ":" svc_list = svc_list[:-1] ior_args['client_hostfile'] = self.hostfile_clients ior_args['pool_uuid'] = self.pool.get_uuid_str() ior_args['svc_list'] = svc_list ior_args['basepath'] = self.basepath ior_args['server_group'] = self.server_group ior_args['tmp_dir'] = self.workdir ior_args['iorflags'] = self.params.get("iorflags", '/run/ior/*') ior_args['iteration'] = self.params.get("iteration", '/run/ior/*') ior_args['transfer_size'] = ior_param[2] ior_args['block_size'] = ior_param[3] ior_args['object_class'] = obj_type ior_args['client_processes'] = ior_param[4] try: size_before_ior = self.pool.pool_query() ior_utils.run_ior_daos(ior_args['client_hostfile'], ior_args['iorflags'], ior_args['iteration'], ior_args['block_size'], ior_args['transfer_size'], ior_args['pool_uuid'], ior_args['svc_list'], ior_args['object_class'], ior_args['basepath'], ior_args['client_processes'], cont_uuid=str(uuid.uuid4()), display_output=True) self.verify_pool_size(size_before_ior, ior_args) except ior_utils.IorFailed as exe: print (exe) print (traceback.format_exc()) self.fail() try: if self.pool_connect: self.pool.disconnect() self.pool_connect = False if self.pool: self.pool.destroy(1) except DaosApiError as exe: print (exe) self.fail("Failed to Destroy/Disconnect the Pool")
def test_rebuild_with_io(self): """ Test ID: Rebuild-003 Test Description: Trigger a rebuild while I/O is ongoing. Use Cases: -- single pool, single client performing continous read/write/verify sequence while failure/rebuild is triggered in another process :avocado: tags=pool,rebuild,rebuildwithio """ # the rebuild tests need to redo this stuff each time so not in setup # as it usually would be server_group = self.params.get("name", '/server_config/', 'daos_server') self.hostlist_servers = self.params.get("test_machines", '/run/hosts/') hostfile_servers = write_host_file.write_host_file( self.hostlist_servers, self.workdir) try: self.agent_sessions = agent_utils.run_agent(self.basepath, self.hostlist_servers) server_utils.run_server(hostfile_servers, server_group, self.basepath) # use the uid/gid of the user running the test, these should # be perfectly valid createuid = os.geteuid() creategid = os.getegid() # parameters used in pool create that are in yaml createmode = self.params.get("mode", '/run/testparams/createmode/') createsetid = self.params.get("setname", '/run/testparams/createset/') createsize = self.params.get("size", '/run/testparams/createsize/') # initialize a python pool object then create the underlying # daos storage pool = DaosPool(self.context) pool.create(createmode, createuid, creategid, createsize, createsetid, None) pool.connect(1 << 1) container = DaosContainer(self.context) container.create(pool.handle) container.open() # get pool status and make sure it all looks good before we start pool.pool_query() if pool.pool_info.pi_ndisabled != 0: self.fail("Number of disabled targets reporting incorrectly.\n") if pool.pool_info.pi_rebuild_st.rs_errno != 0: self.fail("Rebuild error but rebuild hasn't run.\n") if pool.pool_info.pi_rebuild_st.rs_done != 1: self.fail("Rebuild is running but device hasn't failed yet.\n") if pool.pool_info.pi_rebuild_st.rs_obj_nr != 0: self.fail("Rebuilt objs not zero.\n") if pool.pool_info.pi_rebuild_st.rs_rec_nr != 0: self.fail("Rebuilt recs not zero.\n") dummy_pool_version = pool.pool_info.pi_rebuild_st.rs_version # do I/O for 30 seconds dummy_bw = io_utilities.continuous_io(container, 30) # trigger the rebuild rank = self.params.get("rank", '/run/testparams/ranks/*') server = DaosServer(self.context, server_group, rank) server.kill(1) pool.exclude([rank]) # do another 30 seconds of I/O, # waiting for some improvements in server bootstrap # at which point we can move the I/O to a separate client and # really pound it with I/O dummy_bw = io_utilities.continuous_io(container, 30) # wait for the rebuild to finish while True: pool.pool_query() if pool.pool_info.pi_rebuild_st.rs_done == 1: break else: time.sleep(2) # check rebuild statistics if pool.pool_info.pi_ndisabled != 1: self.fail("Number of disabled targets reporting incorrectly: {}" .format(pool.pool_info.pi_ndisabled)) if pool.pool_info.pi_rebuild_st.rs_errno != 0: self.fail("Rebuild error reported: {}".format( pool.pool_info.pi_rebuild_st.rs_errno)) if pool.pool_info.pi_rebuild_st.rs_obj_nr <= 0: self.fail("No objects have been rebuilt.") if pool.pool_info.pi_rebuild_st.rs_rec_nr <= 0: self.fail("No records have been rebuilt.") except (ValueError, DaosApiError) as excep: print(excep) print(traceback.format_exc()) self.fail("Expecting to pass but test has failed.\n") finally: # wait for the I/O process to finish try: server_utils.stop_server(hosts=self.hostlist_servers) os.remove(hostfile_servers) # really make sure everything is gone check_for_pool.cleanup_pools(self.hostlist_servers) finally: if self.agent_sessions: agent_utils.stop_agent(self.agent_sessions) server_utils.kill_server(self.hostlist_servers)
class FullPoolContainerCreate(Test): """ Class for test to create a container in a pool with no remaining free space. """ def setUp(self): # get paths from the build_vars generated by build with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../../../.build_vars.json")) as f: build_paths = json.load(f) self.basepath = os.path.normpath(build_paths['PREFIX'] + "/../") self.server_group = self.params.get("server_group", '/server/', 'daos_default_oops') self.context = DaosContext(build_paths['PREFIX'] + '/lib/') self.pool = DaosPool(self.context) self.d_log = DaosLog(self.context) self.hostlist = self.params.get("test_machines1", '/hosts/') self.hostfile = WriteHostFile.WriteHostFile(self.hostlist, self.workdir) ServerUtils.runServer(self.hostfile, self.server_group, self.basepath) def tearDown(self): # shut 'er down """ wrap pool destroy in a try; in case pool create didn't succeed, we still need the server to be shut down in any case """ try: self.pool.destroy(1) finally: ServerUtils.stopServer(hosts=self.hostlist) def test_no_space_cont_create(self): """ :avocado: tags=pool,cont,fullpoolcontcreate,small,vm """ setid = self.params.get("setname", '/run/testparams/setnames/validsetname/') # full storage rc err = "-1007" # probably should be -1007, revisit later err2 = "-1009" # create pool mode = self.params.get("mode", '/conttests/createmode/') self.d_log.debug("mode is {0}".format(mode)) uid = os.geteuid() gid = os.getegid() # 16 mb pool, minimum size currently possible size = 16777216 self.d_log.debug("creating pool") self.pool.create(mode, uid, gid, size, self.server_group, None) self.d_log.debug("created pool") # connect to the pool self.d_log.debug("connecting to pool") self.pool.connect(1 << 1) self.d_log.debug("connected to pool") # query the pool self.d_log.debug("querying pool info") pool_info = self.pool.pool_query() self.d_log.debug("queried pool info") # create a container try: self.d_log.debug("creating container") self.cont = DaosContainer(self.context) self.cont.create(self.pool.handle) self.d_log.debug("created container") except DaosApiError as e: self.d_log.error("caught exception creating container: " "{0}".format(e)) self.fail("caught exception creating container: {0}".format(e)) self.d_log.debug("opening container") self.cont.open() self.d_log.debug("opened container") # generate random dkey, akey each time # write 1mb until no space, then 1kb, etc. to fill pool quickly for x in [1048576, 1024, 1]: write_count = 0 while(True): self.d_log.debug("writing obj {0}, sz {1} to " "container".format(write_count, x)) my_str = "a" * x my_str_sz = x dkey = ''.join(random.choice(string.lowercase) for i in range(5)) akey = ''.join(random.choice(string.lowercase) for i in range(5)) try: oid, tx = self.cont.write_an_obj(my_str, my_str_sz, dkey, akey, obj_cls=1) self.d_log.debug("wrote obj {0}, sz {1}".format(write_count, x)) write_count += 1 except DaosApiError as e: if not (err in repr(e) or err2 in repr(e)): self.d_log.error("caught exception while writing " "object: {0}".format(repr(e))) self.fail("caught exception while writing object: {0}" .format(repr(e))) else: self.d_log.debug("pool is too full for {0} byte " "objects".format(x)) break self.d_log.debug("closing container") self.cont.close() self.d_log.debug("closed container") # create a 2nd container now that pool is full try: self.d_log.debug("creating 2nd container") self.cont2 = DaosContainer(self.context) self.cont2.create(self.pool.handle) self.d_log.debug("created 2nd container") self.d_log.debug("opening container 2") self.cont2.open() self.d_log.debug("opened container 2") self.d_log.debug("writing one more object, write expected to fail") self.cont2.write_an_obj(my_str, my_str_sz, dkey, akey, obj_cls=1) self.d_log.debug("wrote one more object--this should never print") except DaosApiError as e: if not (err in repr(e) or err2 in repr(e)): self.d_log.error("caught unexpected exception while " "writing object: {0}".format(repr(e))) self.fail("caught unexpected exception while writing " "object: {0}".format(repr(e))) else: self.d_log.debug("correctly caught -1007 while attempting " "to write object in full pool")
class RebuildNoCap(Test): """ Test Class Description: This class contains tests for pool rebuild. :avocado: tags=pool,rebuild,nocap """ build_paths = [] server_group = "" CONTEXT = None POOL = None hostfile = "" def setUp(self): """ setup for the test """ # get paths from the build_vars generated by build with open('../../../.build_vars.json') as f: build_paths = json.load(f) self.CONTEXT = DaosContext(build_paths['PREFIX'] + '/lib/') # generate a hostfile self.host_list = self.params.get("test_machines", '/run/hosts/') tmp = build_paths['PREFIX'] + '/tmp' self.hostfile = WriteHostFile.WriteHostFile(self.host_list, tmp) # fire up the DAOS servers self.server_group = self.params.get("server_group", '/run/server/', 'daos_server') ServerUtils.runServer(self.hostfile, self.server_group, build_paths['PREFIX'] + '/../') time.sleep(3) # create a pool to test with createmode = self.params.get("mode", '/run/pool/createmode/') createuid = self.params.get("uid", '/run/pool/createuid/') creategid = self.params.get("gid", '/run/pool/creategid/') createsetid = self.params.get("setname", '/run/pool/createset/') createsize = self.params.get("size", '/run/pool/createsize/') self.POOL = DaosPool(self.CONTEXT) self.POOL.create(createmode, createuid, creategid, createsize, createsetid) uuid = self.POOL.get_uuid_str() time.sleep(2) # stuff some bogus data into the pool how_many_bytes = long( self.params.get("datasize", '/run/testparams/datatowrite/')) exepath = build_paths['PREFIX'] +\ "/../src/tests/ftest/util/WriteSomeData.py" cmd = "export DAOS_POOL={0}; export DAOS_SVCL=1; mpirun"\ " --np 1 --host {1} {2} {3} testfile".format( uuid, self.host_list[0], exepath, how_many_bytes) subprocess.call(cmd, shell=True) def tearDown(self): """ cleanup after the test """ os.remove(self.hostfile) self.POOL.destroy(1) ServerUtils.stopServer() def test_rebuild_no_capacity(self): """ :avocado: tags=pool,rebuild,nocap """ try: print "\nsetup complete, starting test\n" # create a server object that references on of our pool target hosts # and then kill it svr_to_kill = int( self.params.get("rank_to_kill", '/run/testparams/ranks/')) sh = DaosServer(self.CONTEXT, bytes(self.server_group), svr_to_kill) time.sleep(1) sh.kill(1) # exclude the target from the dead server self.POOL.exclude([svr_to_kill]) # exclude should trigger rebuild, check self.POOL.connect(1 << 1) status = self.POOL.pool_query() if not status.pi_ntargets == len(self.host_list): self.fail("target count wrong.\n") if not status.pi_ndisabled == 1: self.fail("disabled target count wrong.\n") # the pool should be too full to start a rebuild so # expecting an error # not sure yet specifically what error if status.pi_rebuild_st[2] == 0: self.fail("expecting rebuild to fail but it didn't.\n") except ValueError as e: print(e) print(traceback.format_exc()) self.fail("Expecting to pass but test has failed.\n")
class NvmeIo(IorTestBase): """Test class for NVMe with IO tests. Test Class Description: Test the general Metadata operations and boundary conditions. :avocado: recursive """ @avocado.fail_on(DaosApiError) def test_nvme_io(self): """Jira ID: DAOS-2082. Test Description: Test will run IOR with standard and non standard sizes. IOR will be run for all Object type supported. Purpose is to verify pool size (SCM and NVMe) for IOR file. Use Cases: Running multiple IOR on same server start instance. :avocado: tags=all,daosio,full_regression,hw,nvme_io """ # Pool params pool_mode = self.params.get("mode", '/run/pool/createmode/*') pool_uid = os.geteuid() pool_gid = os.getegid() pool_group = self.params.get("setname", '/run/pool/createset/*') pool_svcn = self.params.get("svcn", '/run/pool/createsvc/') # Test params tests = self.params.get("ior_sequence", '/run/ior/*') object_type = self.params.get("object_type", '/run/ior/*') # Loop for every IOR object type for obj_type in object_type: for ior_param in tests: # There is an issue with NVMe if Transfer size>64M, # Skipped this sizes for now if ior_param[2] > 67108864: self.log.warning("Xfersize > 64M fails - DAOS-1264") continue # Create and connect to a pool self.pool = DaosPool(self.context) self.pool.create(pool_mode, pool_uid, pool_gid, ior_param[0], pool_group, svcn=pool_svcn, nvme_size=ior_param[1]) self.pool.connect(1 << 1) # Get the current pool sizes size_before_ior = self.pool.pool_query() # Run ior with the parameters specified for this pass self.ior_cmd.transfer_size.update(ior_param[2]) self.ior_cmd.block_size.update(ior_param[3]) self.ior_cmd.daos_oclass.update(obj_type) self.ior_cmd.set_daos_params(self.server_group, self.pool) self.run_ior(self.get_job_manager_command(), ior_param[4]) # Verify IOR consumed the expected amount ofrom the pool self.verify_pool_size(size_before_ior, ior_param[4]) try: if self.pool: self.pool.disconnect() self.pool.destroy(1) except DaosApiError as error: self.log.error("Pool disconnect/destroy error: %s", str(error)) self.fail("Failed to Destroy/Disconnect the Pool")
def test_simple_rebuild(self): """ Test ID: Rebuild-001 Test Description: The most basic rebuild test. Use Cases: -- single pool rebuild, single client, various reord/object counts :avocado: tags=pool,rebuild,rebuildsimple """ # the rebuild tests need to redo this stuff each time so not in setup # as it usually would be setid = self.params.get("setname", '/run/testparams/setnames/') server_group = self.params.get("server_group", '/server/', 'daos_server') basepath = os.path.normpath(self.build_paths['PREFIX'] + "/../") tmp = self.build_paths['PREFIX'] + '/tmp' self.hostlist = self.params.get("test_machines", '/run/hosts/') hostfile = WriteHostFile.WriteHostFile(self.hostlist, tmp) try: ServerUtils.runServer(hostfile, server_group, basepath) # use the uid/gid of the user running the test, these should # be perfectly valid createuid = os.geteuid() creategid = os.getegid() # parameters used in pool create that are in yaml createmode = self.params.get("mode", '/run/testparams/createmode/') createsetid = self.params.get("setname", '/run/testparams/createset/') createsize = self.params.get("size", '/run/testparams/createsize/') # initialize a python pool object then create the underlying # daos storage pool = DaosPool(self.Context) pool.create(createmode, createuid, creategid, createsize, createsetid, None) # want an open connection during rebuild pool.connect(1 << 1) # get pool status we want to test later pool.pool_query() if pool.pool_info.pi_ndisabled != 0: self.fail( "Number of disabled targets reporting incorrectly.\n") if pool.pool_info.pi_rebuild_st.rs_errno != 0: self.fail("Rebuild error but rebuild hasn't run.\n") if pool.pool_info.pi_rebuild_st.rs_done != 1: self.fail("Rebuild is running but device hasn't failed yet.\n") if pool.pool_info.pi_rebuild_st.rs_obj_nr != 0: self.fail("Rebuilt objs not zero.\n") if pool.pool_info.pi_rebuild_st.rs_rec_nr != 0: self.fail("Rebuilt recs not zero.\n") pool_version = pool.pool_info.pi_rebuild_st.rs_version # create a container container = DaosContainer(self.Context) container.create(pool.handle) # now open it container.open() # how many objects and records are we creating objcount = self.params.get("objcount", '/run/testparams/numobjects/*') reccount = self.params.get("reccount", '/run/testparams/numrecords/*') if objcount == 0: reccount = 0 # which rank to write to and kill rank = self.params.get("rank", '/run/testparams/ranks/*') # how much data to write with each key size = self.params.get("size", '/run/testparams/datasize/') saved_data = [] for i in range(0, objcount): obj = None for j in range(0, reccount): # make some stuff up and write dkey = ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(5)) akey = ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(5)) data = ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(size)) obj, tx = container.write_an_obj(data, len(data), dkey, akey, obj, rank) saved_data.append((obj, dkey, akey, data, tx)) # read the data back and make sure its correct data2 = container.read_an_obj(size, dkey, akey, obj, tx) if data != data2.value: self.fail("Write data 1, read it back, didn't match\n") # kill a server that has server = DaosServer(self.Context, server_group, rank) server.kill(1) # temporarily, the exclude of a failed target must be done # manually pool.exclude([rank]) while True: # get the pool/rebuild status again pool.pool_query() if pool.pool_info.pi_rebuild_st.rs_done == 1: break else: time.sleep(2) if pool.pool_info.pi_ndisabled != 1: self.fail( "Number of disabled targets reporting incorrectly: {}". format(pool.pool_info.pi_ndisabled)) if pool.pool_info.pi_rebuild_st.rs_errno != 0: self.fail("Rebuild error reported: {}".format( pool.pool_info.pi_rebuild_st.rs_errno)) if pool.pool_info.pi_rebuild_st.rs_obj_nr != objcount: self.fail("Rebuilt objs not as expected: {0} {1}".format( pool.pool_info.pi_rebuild_st.rs_obj_nr, objcount)) if pool.pool_info.pi_rebuild_st.rs_rec_nr != (reccount * objcount): self.fail("Rebuilt recs not as expected: {0} {1}".format( pool.pool_info.pi_rebuild_st.rs_rec_nr, reccount * objcount)) # now that the rebuild finished verify the records are correct for tup in saved_data: data2 = container.read_an_obj(len(tup[3]), tup[1], tup[2], tup[0], tup[4]) if tup[3] != data2.value: self.fail("after rebuild data didn't check out") except DaosApiError as e: print(e) print(traceback.format_exc()) self.fail("Expecting to pass but test has failed.\n") finally: try: ServerUtils.stopServer(hosts=self.hostlist) os.remove(hostfile) # really make sure everything is gone CheckForPool.CleanupPools(self.hostlist) finally: ServerUtils.killServer(self.hostlist)
class RebuildNoCap(Test): """ Test Class Description: This class contains tests for pool rebuild. :avocado: tags=pool,rebuild,nocap """ def setUp(self): """ setup for the test """ self.agent_sessions = None # get paths from the build_vars generated by build with open('../../../.build_vars.json') as build_file: build_paths = json.load(build_file) self.context = DaosContext(build_paths['PREFIX'] + '/lib/') self.basepath = os.path.normpath(build_paths['PREFIX'] + "/../") # generate a hostfile self.hostlist = self.params.get("test_machines", '/run/hosts/') self.hostfile = write_host_file.write_host_file(self.hostlist, self.workdir) # fire up the DAOS servers self.server_group = self.params.get("name", '/run/server_config/', 'daos_server') self.agent_sessions = AgentUtils.run_agent(self.basepath, self.hostlist) server_utils.run_server(self.hostfile, self.server_group, build_paths['PREFIX'] + '/../') # create a pool to test with createmode = self.params.get("mode", '/run/pool/createmode/') createuid = self.params.get("uid", '/run/pool/createuid/') creategid = self.params.get("gid", '/run/pool/creategid/') createsetid = self.params.get("setname", '/run/pool/createset/') createsize = self.params.get("size", '/run/pool/createsize/') self.pool = DaosPool(self.context) self.pool.create(createmode, createuid, creategid, createsize, createsetid) uuid = self.pool.get_uuid_str() time.sleep(2) # stuff some bogus data into the pool how_many_bytes = long(self.params.get("datasize", '/run/testparams/datatowrite/')) exepath = os.path.join(build_paths['PREFIX'], "/../src/tests/ftest/util/write_some_data.py") cmd = "export DAOS_POOL={0}; export DAOS_SVCL=1; mpirun"\ " --np 1 --host {1} {2} {3} testfile".format( uuid, self.hostlist[0], exepath, how_many_bytes) subprocess.call(cmd, shell=True) def tearDown(self): """ cleanup after the test """ try: os.remove(self.hostfile) if self.pool: self.pool.destroy(1) finally: if self.agent_sessions: AgentUtils.stop_agent(self.hostlist, self.agent_sessions) server_utils.stop_server(hosts=self.hostlist) def test_rebuild_no_capacity(self): """ :avocado: tags=pool,rebuild,nocap """ try: print("\nsetup complete, starting test\n") # create a server object that references on of our pool target hosts # and then kill it svr_to_kill = int(self.params.get("rank_to_kill", '/run/testparams/ranks/')) d_server = DaosServer(self.context, bytes(self.server_group), svr_to_kill) time.sleep(1) d_server.kill(1) # exclude the target from the dead server self.pool.exclude([svr_to_kill]) # exclude should trigger rebuild, check self.pool.connect(1 << 1) status = self.pool.pool_query() if not status.pi_ntargets == len(self.hostlist): self.fail("target count wrong.\n") if not status.pi_ndisabled == 1: self.fail("disabled target count wrong.\n") # the pool should be too full to start a rebuild so # expecting an error # not sure yet specifically what error if status.pi_rebuild_st.rs_errno == 0: self.fail("expecting rebuild to fail but it didn't.\n") except DaosApiError as excep: print(excep) print(traceback.format_exc()) self.fail("Expecting to pass but test has failed.\n")
class DestroyRebuild(Test): """ Test Class Description: This test verifies destruction of a pool that is rebuilding. :avocado: tags=pool,pooldestroy,rebuild,desreb """ build_paths = [] server_group = "" CONTEXT = None POOL = None hostfile = "" def setUp(self): """ setup for the test """ # get paths from the build_vars generated by build with open('../../../.build_vars.json') as f: build_paths = json.load(f) self.CONTEXT = DaosContext(build_paths['PREFIX'] + '/lib/') # generate a hostfile self.hostlist = self.params.get("test_machines",'/run/hosts/') tmp = build_paths['PREFIX'] + '/tmp' self.hostfile = WriteHostFile.WriteHostFile(self.hostlist, tmp) # fire up the DAOS servers self.server_group = self.params.get("server_group",'/run/server/', 'daos_server') ServerUtils.runServer(self.hostfile, self.server_group, build_paths['PREFIX'] + '/../') time.sleep(3) # create a pool to test with createmode = self.params.get("mode",'/run/pool/createmode/') createuid = self.params.get("uid",'/run/pool/createuid/') creategid = self.params.get("gid",'/run/pool/creategid/') createsetid = self.params.get("setname",'/run/pool/createset/') createsize = self.params.get("size",'/run/pool/createsize/') self.POOL = DaosPool(self.CONTEXT) self.POOL.create(createmode, createuid, creategid, createsize, createsetid) uuid = self.POOL.get_uuid_str() time.sleep(2) def tearDown(self): """ cleanup after the test """ try: os.remove(self.hostfile) if self.POOL: self.POOL.destroy(1) finally: ServerUtils.stopServer(hosts=self.hostlist) def test_destroy_while_rebuilding(self): """ :avocado: tags=pool,pooldestroy,rebuild,desreb """ try: print "\nsetup complete, starting test\n" # create a server object that references on of our pool target hosts # and then kill it svr_to_kill = int(self.params.get("rank_to_kill", '/run/testparams/ranks/')) sh = DaosServer(self.CONTEXT, bytes(self.server_group), svr_to_kill) print "created server " # BUG if you don't connect the rebuild doesn't start correctly self.POOL.connect(1 << 1) status = self.POOL.pool_query() if not status.pi_ntargets == len(self.hostlist): self.fail("target count wrong.\n") if not status.pi_ndisabled == 0: self.fail("disabled target count wrong.\n") print "connect " time.sleep(1) sh.kill(1) print "killed server " # exclude the target from the dead server self.POOL.exclude([svr_to_kill]) print "exclude target " #self.POOL.disconnect() #print "disconnect " # the rebuild won't take long since there is no data so do # the destroy quickly self.POOL.destroy(1) print "destroy " except DaosApiError as e: print(e) print(traceback.format_exc()) self.fail("Expecting to pass but test has failed.\n")
class DestroyRebuild(Test): """ Test Class Description: This test verifies destruction of a pool that is rebuilding. :avocado: recursive """ build_paths = [] server_group = "" context = None pool = None hostfile_servers = "" def setUp(self): """ setup for the test """ self.agent_sessions = None # get paths from the build_vars generated by build with open('../../../.build_vars.json') as build_file: build_paths = json.load(build_file) self.context = DaosContext(build_paths['PREFIX'] + '/lib/') self.basepath = os.path.normpath(build_paths['PREFIX'] + "/../") # generate a hostfile self.hostlist_servers = self.params.get("test_machines", '/run/hosts/') self.hostfile_servers = write_host_file.write_host_file( self.hostlist_servers, self.workdir) # fire up the DAOS servers self.server_group = self.params.get("name", '/run/server_config/', 'daos_server') self.agent_sessions = agent_utils.run_agent(self.basepath, self.hostlist_servers) server_utils.run_server(self.hostfile_servers, self.server_group, build_paths['PREFIX'] + '/../') # create a pool to test with createmode = self.params.get("mode", '/run/pool/createmode/') createuid = self.params.get("uid", '/run/pool/createuid/') creategid = self.params.get("gid", '/run/pool/creategid/') createsetid = self.params.get("setname", '/run/pool/createset/') createsize = self.params.get("size", '/run/pool/createsize/') self.pool = DaosPool(self.context) self.pool.create(createmode, createuid, creategid, createsize, createsetid) self.pool.get_uuid_str() time.sleep(2) def tearDown(self): """ cleanup after the test """ try: os.remove(self.hostfile_servers) if self.pool: self.pool.destroy(1) finally: if self.agent_sessions: agent_utils.stop_agent(self.agent_sessions) server_utils.stop_server(hosts=self.hostlist_servers) def test_destroy_while_rebuilding(self): """ :avocado: tags=pool,pooldestroy,rebuild,desreb """ try: print("\nsetup complete, starting test\n") # create a server object that references on of our pool target hosts # and then kill it svr_to_kill = int(self.params.get("rank_to_kill", '/run/testparams/ranks/')) server = DaosServer(self.context, bytes(self.server_group), svr_to_kill) print("created server ") # BUG if you don't connect the rebuild doesn't start correctly self.pool.connect(1 << 1) status = self.pool.pool_query() if not status.pi_ntargets == len(self.hostlist_servers): self.fail("target count wrong.\n") if not status.pi_ndisabled == 0: self.fail("disabled target count wrong.\n") print("connect ") time.sleep(1) server.kill(1) print("killed server ") # exclude the target from the dead server self.pool.exclude([svr_to_kill]) print("exclude target ") #self.pool.disconnect() #print "disconnect " # the rebuild won't take long since there is no data so do # the destroy quickly self.pool.destroy(1) print("destroy ") except DaosApiError as excep: print(excep) print(traceback.format_exc()) self.fail("Expecting to pass but test has failed.\n")
class FullPoolContainerCreate(TestWithServers): """ Class for test to create a container in a pool with no remaining free space. :avocado: recursive """ def __init__(self, *args, **kwargs): super(FullPoolContainerCreate, self).__init__(*args, **kwargs) self.cont = None self.cont2 = None @skipForTicket("DAOS-3142") def test_no_space_cont_create(self): """ :avocado: tags=all,container,tiny,full_regression,fullpoolcontcreate """ # full storage rc err = "-1007" # probably should be -1007, revisit later err2 = "-1009" # create pool self.pool = DaosPool(self.context) mode = self.params.get("mode", '/conttests/createmode/') self.d_log.debug("mode is {0}".format(mode)) uid = os.geteuid() gid = os.getegid() # 16 mb pool, minimum size currently possible size = 16777216 self.d_log.debug("creating pool") self.pool.create(mode, uid, gid, size, self.server_group, None) self.d_log.debug("created pool") # connect to the pool self.d_log.debug("connecting to pool") self.pool.connect(1 << 1) self.d_log.debug("connected to pool") # query the pool self.d_log.debug("querying pool info") dummy_pool_info = self.pool.pool_query() self.d_log.debug("queried pool info") # create a container try: self.d_log.debug("creating container") self.cont = DaosContainer(self.context) self.cont.create(self.pool.handle) self.d_log.debug("created container") except DaosApiError as excep: self.d_log.error("caught exception creating container: " "{0}".format(excep)) self.fail("caught exception creating container: {0}".format(excep)) self.d_log.debug("opening container") self.cont.open() self.d_log.debug("opened container") # generate random dkey, akey each time # write 1mb until no space, then 1kb, etc. to fill pool quickly for obj_sz in [1048576, 10240, 10, 1]: write_count = 0 while True: self.d_log.debug("writing obj {0}, sz {1} to " "container".format(write_count, obj_sz)) my_str = "a" * obj_sz my_str_sz = obj_sz dkey = (''.join( random.choice(string.lowercase) for i in range(5))) akey = (''.join( random.choice(string.lowercase) for i in range(5))) try: dummy_oid, dummy_tx = self.cont.write_an_obj( my_str, my_str_sz, dkey, akey, obj_cls="OC_SX") self.d_log.debug("wrote obj {0}, sz {1}".format( write_count, obj_sz)) write_count += 1 except DaosApiError as excep: if not (err in repr(excep) or err2 in repr(excep)): self.d_log.error("caught exception while writing " "object: {0}".format(repr(excep))) self.fail("caught exception while writing object: {0}". format(repr(excep))) else: self.d_log.debug("pool is too full for {0} byte " "objects".format(obj_sz)) break self.d_log.debug("closing container") self.cont.close() self.d_log.debug("closed container") # create a 2nd container now that pool is full try: self.d_log.debug("creating 2nd container") self.cont2 = DaosContainer(self.context) self.cont2.create(self.pool.handle) self.d_log.debug("created 2nd container") self.d_log.debug("opening container 2") self.cont2.open() self.d_log.debug("opened container 2") self.d_log.debug("writing one more object, write expected to fail") self.cont2.write_an_obj(my_str, my_str_sz, dkey, akey, obj_cls="OC_SX") self.fail("wrote one more object after pool was completely filled," " this should never print") except DaosApiError as excep: if not (err in repr(excep) or err2 in repr(excep)): self.d_log.error("caught unexpected exception while " "writing object: {0}".format(repr(excep))) self.fail("caught unexpected exception while writing " "object: {0}".format(repr(excep))) else: self.d_log.debug("correctly caught -1007 while attempting " "to write object in full pool")
class FullPoolContainerCreate(Test): """ Class for test to create a container in a pool with no remaining free space. """ def setUp(self): # get paths from the build_vars generated by build with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../../../.build_vars.json")) as f: build_paths = json.load(f) self.basepath = os.path.normpath(build_paths['PREFIX'] + "/../") self.tmp = build_paths['PREFIX'] + '/tmp' self.server_group = self.params.get("server_group", '/server/', 'daos_default_oops') self.context = DaosContext(build_paths['PREFIX'] + '/lib/') print("initialized!!!\n") self.pool = DaosPool(self.context) self.hostlist = self.params.get("test_machines1", '/hosts/') self.hostfile = WriteHostFile.WriteHostFile(self.hostlist, self.tmp) ServerUtils.runServer(self.hostfile, self.server_group, self.basepath) time.sleep(5) def tearDown(self): # shut 'er down ServerUtils.stopServer() if self.hostfile is not None: os.remove(self.hostfile) def test_no_space_cont_create(self): """ :avocado: tags=pool,container,fullpoolcontcreate,small,vm """ setid = self.params.get("setname", '/run/testparams/setnames/validsetname/') # create pool mode = self.params.get("mode", '/conttests/createmode/') print("mode is {0}".format(mode)) uid = os.geteuid() gid = os.getegid() # 16 mb pool, minimum size currently possible size = 16777216 print("creating pool") self.pool.create(mode, uid, gid, size, self.server_group, None) print("created pool") # connect to the pool print("connecting to pool") self.pool.connect(1 << 1) print("connected to pool") # query the pool print("querying pool info") pool_info = self.pool.pool_query() print("queried pool info") # create a container try: print("creating container") self.cont = DaosContainer(self.context) self.cont.create(self.pool.handle) print("created container") except ValueError as e: self.fail("caught exception creating container: {0}".format(e)) print("opening container") self.cont.open() print("opened container") # generate random dkey, akey each time # write 1mb until no space, then 1kb, etc. to fill pool quickly for x in [1048576, 1024, 1]: write_count = 0 while(True): print("writing obj {0}, sz {1} to container".format(write_count, x)) my_str = "a" * x my_str_sz = x dkey = ''.join(random.choice(string.lowercase) for i in range(5)) akey = ''.join(random.choice(string.lowercase) for i in range(5)) try: oid, epoch = self.cont.write_an_obj(my_str, my_str_sz, dkey, akey) print("wrote obj {0}, sz {1}".format(write_count, x)) write_count += 1 except ValueError as e: if "RC: -1007" not in repr(e): self.fail("caught exception while writing object: {0}" .format(repr(e))) else: print("pool is too full for {0} byte objects".format(x)) break print("closing container") self.cont.close() print("closed container") # create a 2nd container now that pool is full try: print("creating 2nd container") self.cont2 = DaosContainer(self.context) self.cont2.create(self.pool.handle) print("created 2nd container") print("writing one more object, write expected to fail") self.cont2.write_an_obj(my_str, my_str_sz, dkey, akey) print("wrote one more object--this should never print") except ValueError as e: if "RC: -1007" not in repr(e): self.fail("caught unexpected exception while writing " "object: {0}".format(repr(e))) else: print("correctly caught -1007 while attempting " "to write object in full pool")