class OSAOfflineExtend(OSAUtils): # pylint: disable=too-many-ancestors """ Test Class Description: This test runs daos_server offline extend test cases. :avocado: recursive """ def setUp(self): """Set up for test case.""" super().setUp() self.dmg_command = self.get_dmg_command() self.daos_command = DaosCommand(self.bin) # Start an additional server. self.ior_test_sequence = self.params.get("ior_test_sequence", '/run/ior/iorflags/*') self.extra_servers = self.params.get("test_servers", "/run/extra_servers/*") self.rank = self.params.get("rank_list", '/run/test_ranks/*') self.test_oclass = None self.dmg_command.exit_status_exception = True def run_offline_extend_test(self, num_pool, data=False, oclass=None): """Run the offline extend without data. Args: num_pool (int) : total pools to create for testing purposes. data (bool) : whether pool has no data or to create some data in pool. Defaults to False. oclass (list) : list of daos object class (eg: "RP_2G8") """ # Create a pool pool = {} if oclass is None: oclass = [] oclass.append(self.ior_cmd.dfs_oclass.value) self.log.info(oclass[0]) for val in range(0, num_pool): # Perform IOR write using the oclass list if val < len(oclass): index = val else: index = 0 pool[val] = TestPool(self.context, dmg_command=self.dmg_command) pool[val].get_params(self) pool[val].create() self.pool = pool[val] test_seq = self.ior_test_sequence[0] self.pool.set_property("reclaim", "disabled") if data: self.run_ior_thread("Write", oclass[index], test_seq) self.run_mdtest_thread() if self.test_during_aggregation is True: self.run_ior_thread("Write", oclass[index], test_seq) # Start the additional servers and extend the pool self.log.info("Extra Servers = %s", self.extra_servers) self.start_additional_servers(self.extra_servers) # Give sometime for the additional server to come up. for retry in range(0, 10): scan_info = self.get_dmg_command().system_query() if not check_system_query_status(scan_info): if retry == 9: self.fail("One or more servers not in expected status") else: break for rank_index, rank_val in enumerate(self.rank): # If total pools less than 3, extend only a single pool. # If total pools >= 3 : Extend only 3 pools. if num_pool >= len(self.rank): val = rank_index else: val = 0 self.pool = pool[val] scm_size = self.pool.scm_size nvme_size = self.pool.nvme_size self.pool.display_pool_daos_space("Pool space: Beginning") pver_begin = self.get_pool_version() self.log.info("Pool Version at the beginning %s", pver_begin) # Enable aggregation for multiple pool testing only. if self.test_during_aggregation is True and (num_pool > 1): self.delete_extra_container(self.pool) output = self.dmg_command.pool_extend(self.pool.uuid, rank_val, scm_size, nvme_size) self.print_and_assert_on_rebuild_failure(output) pver_extend = self.get_pool_version() self.log.info("Pool Version after extend %d", pver_extend) # Check pool version incremented after pool extend self.assertTrue(pver_extend > pver_begin, "Pool Version Error: After extend") display_string = "Pool{} space at the End".format(val) pool[val].display_pool_daos_space(display_string) if data: # Perform the IOR read using the same # daos object class used for write. if val < len(oclass): index = val else: index = 0 self.run_ior_thread("Read", oclass[index], test_seq) self.run_mdtest_thread() self.container = self.pool_cont_dict[self.pool][0] kwargs = {"pool": self.pool.uuid, "cont": self.container.uuid} output = self.daos_command.container_check(**kwargs) self.log.info(output) def test_osa_offline_extend(self): """ JIRA ID: DAOS-4751 Test Description: Validate Offline Extend :avocado: tags=all,daily_regression :avocado: tags=hw,large :avocado: tags=osa,checksum,osa_extend :avocado: tags=offline_extend,offline_extend_with_csum """ self.log.info("Offline Extend Testing : With Checksum") self.run_offline_extend_test(1, True) def test_osa_offline_extend_without_checksum(self): """Test ID: DAOS-6924 Test Description: Validate Offline extend without Checksum. :avocado: tags=all,full_regression :avocado: tags=hw,large :avocado: tags=osa,osa_extend :avocado: tags=offline_extend,offline_extend_without_csum """ self.test_with_checksum = self.params.get("test_with_checksum", '/run/checksum/*') self.log.info("Offline Extend Testing: Without Checksum") self.run_offline_extend_test(1, data=True) def test_osa_offline_extend_multiple_pools(self): """Test ID: DAOS-6924 Test Description: Validate Offline extend without Checksum. :avocado: tags=all,full_regression :avocado: tags=hw,large :avocado: tags=osa,osa_extend :avocado: tags=offline_extend,offline_extend_multiple_pools """ self.log.info("Offline Extend Testing: Multiple Pools") self.run_offline_extend_test(5, data=True) @skipForTicket("DAOS-7493") def test_osa_offline_extend_oclass(self): """Test ID: DAOS-6924 Test Description: Validate Offline extend without Checksum. :avocado: tags=all,daily_regression :avocado: tags=hw,large :avocado: tags=osa,osa_extend :avocado: tags=offline_extend,offline_extend_oclass """ self.log.info("Offline Extend Testing: oclass") self.test_oclass = self.params.get("oclass", '/run/test_obj_class/*') self.run_offline_extend_test(4, data=True, oclass=self.test_oclass) @skipForTicket("DAOS-7195") def test_osa_offline_extend_during_aggregation(self): """Test ID: DAOS-6294 Test Description: Extend rank while aggregation is happening in parallel :avocado: tags=all,full_regression :avocado: tags=hw,large :avocado: tags=osa,checksum,osa_extend :avocado: tags=offline_extend,offline_extend_during_aggregation """ self.test_during_aggregation = self.params.get("test_with_aggregation", '/run/aggregation/*') self.test_oclass = self.params.get("oclass", '/run/test_obj_class/*') self.log.info("Offline Extend : Aggregation") self.run_offline_extend_test(3, data=True, oclass=self.test_oclass)
class OSAOnlineReintegration(OSAUtils): # pylint: disable=too-many-ancestors """Online Server Addition online re-integration test class. Test Class Description: This test runs the daos_server Online reintegration test cases. :avocado: recursive """ def setUp(self): """Set up for test case.""" super().setUp() self.dmg_command = self.get_dmg_command() self.daos_command = DaosCommand(self.bin) self.ior_test_sequence = self.params.get( "ior_test_sequence", '/run/ior/iorflags/*') self.test_oclass = self.params.get("oclass", '/run/test_obj_class/*') # Recreate the client hostfile without slots defined self.hostfile_clients = write_host_file( self.hostlist_clients, self.workdir, None) self.pool = None self.ds_racer_queue = queue.Queue() self.daos_racer = None self.dmg_command.exit_status_exception = True def daos_racer_thread(self): """Start the daos_racer thread.""" self.daos_racer = DaosRacerCommand(self.bin, self.hostlist_clients[0], self.dmg_command) self.daos_racer.get_params(self) self.daos_racer.set_environment( self.daos_racer.get_environment(self.server_managers[0])) self.daos_racer.run() def run_online_reintegration_test(self, num_pool, racer=False, server_boot=False, oclass=None): """Run the Online reintegration without data. Args: num_pool (int) : total pools to create for testing purposes. data (bool) : whether pool has no data or to create some data in pool. Defaults to False. server_boot (bool) : Perform system stop/start on a rank. Defults to False. oclass (str) : daos object class string (eg: "RP_2G8"). Defaults to None. """ if oclass is None: oclass = self.ior_cmd.dfs_oclass.value test_seq = self.ior_test_sequence[0] # Create a pool pool = {} exclude_servers = (len(self.hostlist_servers) * 2) - 1 # Exclude one rank : other than rank 0. rank = random.randint(1, exclude_servers) #nosec # Start the daos_racer thread if racer is True: daos_racer_thread = threading.Thread(target=self.daos_racer_thread) daos_racer_thread.start() time.sleep(30) for val in range(0, num_pool): pool[val] = add_pool(self, connect=False) pool[val].set_property("reclaim", "disabled") # Exclude and reintegrate the pool_uuid, rank and targets for val in range(0, num_pool): threads = [] self.pool = pool[val] # Instantiate aggregation if self.test_during_aggregation is True: for _ in range(0, 2): self.run_ior_thread("Write", oclass, test_seq) self.delete_extra_container(self.pool) # The following thread runs while performing osa operations. threads.append(threading.Thread(target=self.run_ior_thread, kwargs={"action": "Write", "oclass": oclass, "test": test_seq})) # Launch the IOR threads for thrd in threads: self.log.info("Thread : %s", thrd) thrd.start() time.sleep(1) self.pool.display_pool_daos_space("Pool space: Beginning") pver_begin = self.get_pool_version() self.log.info("Pool Version at the beginning %s", pver_begin) if server_boot is False: output = self.dmg_command.pool_exclude( self.pool.uuid, rank) else: output = self.dmg_command.system_stop(ranks=rank, force=True) self.pool.wait_for_rebuild(False) self.log.info(output) output = self.dmg_command.system_start(ranks=rank) self.print_and_assert_on_rebuild_failure(output) pver_exclude = self.get_pool_version() self.log.info("Pool Version after exclude %s", pver_exclude) # Check pool version incremented after pool exclude # pver_exclude should be greater than # pver_begin + 8 targets. self.assertTrue(pver_exclude > (pver_begin + 8), "Pool Version Error: After exclude") output = self.dmg_command.pool_reintegrate(self.pool.uuid, rank) self.print_and_assert_on_rebuild_failure(output) pver_reint = self.get_pool_version() self.log.info("Pool Version after reintegrate %d", pver_reint) # Check pool version incremented after pool reintegrate self.assertTrue(pver_reint > (pver_exclude + 1), "Pool Version Error: After reintegrate") # Wait to finish the threads for thrd in threads: thrd.join() if not self.out_queue.empty(): self.assert_on_exception() # Check data consistency for IOR in future # Presently, we are running daos_racer in parallel # to IOR and checking the data consistency only # for the daos_racer objects after exclude # and reintegration. if racer is True: daos_racer_thread.join() for val in range(0, num_pool): display_string = "Pool{} space at the End".format(val) self.pool = pool[val] self.pool.display_pool_daos_space(display_string) self.run_ior_thread("Read", oclass, test_seq) self.container = self.pool_cont_dict[self.pool][0] kwargs = {"pool": self.pool.uuid, "cont": self.container.uuid} output = self.daos_command.container_check(**kwargs) self.log.info(output) @skipForTicket("DAOS-7420") def test_osa_online_reintegration(self): """Test ID: DAOS-5075. Test Description: Validate Online Reintegration :avocado: tags=all,daily_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa,checksum :avocado: tags=online_reintegration,online_reintegration_basic """ self.log.info("Online Reintegration : Basic test") self.run_online_reintegration_test(1) @skipForTicket("DAOS-7195") def test_osa_online_reintegration_server_stop(self): """Test ID: DAOS-5920. Test Description: Validate Online Reintegration with server stop :avocado: tags=all,daily_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa,checksum :avocado: tags=online_reintegration,online_reintegration_srv_stop """ self.log.info("Online Reintegration : System stop/start") self.run_online_reintegration_test(1, server_boot=True) @skipForTicket("DAOS-7420") def test_osa_online_reintegration_without_csum(self): """Test ID: DAOS-5075. Test Description: Validate Online Reintegration without checksum :avocado: tags=all,daily_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa,checksum :avocado: tags=online_reintegration,online_reintegration_without_csum """ self.log.info("Online Reintegration : No Checksum") self.test_with_checksum = self.params.get("test_with_checksum", '/run/checksum/*') self.run_online_reintegration_test(1) @skipForTicket("DAOS-7996") def test_osa_online_reintegration_with_aggregation(self): """Test ID: DAOS-6715 Test Description: Reintegrate rank while aggregation is happening in parallel :avocado: tags=all,full_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa,checksum :avocado: tags=online_reintegration,online_reintegration_aggregation """ self.test_during_aggregation = self.params.get("test_with_aggregation", '/run/aggregation/*') self.log.info("Online Reintegration : Aggregation") self.run_online_reintegration_test(1) @skipForTicket("DAOS-7996") def test_osa_online_reintegration_oclass(self): """Test ID: DAOS-6715 Test Description: Reintegrate rank with different object class :avocado: tags=all,full_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa,checksum :avocado: tags=online_reintegration,online_reintegration_oclass """ self.log.info("Online Reintegration : Object Class") for oclass in self.test_oclass: self.run_online_reintegration_test(1, oclass=oclass)
class OSAOfflineReintegration(OSAUtils, ServerFillUp): # pylint: disable=too-many-ancestors """OSA offline Reintegration test cases. Test Class Description: This test runs daos_server offline reintegration test cases. :avocado: recursive """ def setUp(self): """Set up for test case.""" super().setUp() self.dmg_command = self.get_dmg_command() self.daos_command = DaosCommand(self.bin) self.ior_test_sequence = self.params.get("ior_test_sequence", '/run/ior/iorflags/*') self.test_oclass = self.params.get("oclass", '/run/test_obj_class/*') self.ior_test_repetitions = self.params.get("pool_test_repetitions", '/run/pool_capacity/*') self.loop_test_cnt = 1 # Recreate the client hostfile without slots defined self.hostfile_clients = write_host_file(self.hostlist_clients, self.workdir, None) self.dmg_command.exit_status_exception = True def run_offline_reintegration_test(self, num_pool, data=False, server_boot=False, oclass=None, pool_fillup=0): """Run the offline reintegration without data. Args: num_pool (int) : total pools to create for testing purposes. data (bool) : whether pool has no data or to create some data in pool. Defaults to False. server_boot (bool) : Perform system stop/start on a rank. Defaults to False. oclass (str) : daos object class string (eg: "RP_2G8") pool_fillup (int) : Percentage of pool filled up with data before performing OSA operations. """ # Create a pool pool = {} random_pool = 0 if oclass is None: oclass = self.ior_cmd.dfs_oclass.value # Exclude ranks [0, 3, 4] rank = [0, 3, 4] for val in range(0, num_pool): pool[val] = TestPool(context=self.context, dmg_command=self.get_dmg_command(), label_generator=self.label_generator) pool[val].get_params(self) pool[val].create() self.pool = pool[val] self.pool.set_property("reclaim", "disabled") test_seq = self.ior_test_sequence[0] if data: # if pool_fillup is greater than 0, then # use start_ior_load method from nvme_utils.py. # Otherwise, use the osa_utils.py run_ior_thread # method. if pool_fillup > 0: self.ior_cmd.dfs_oclass.update(oclass) self.ior_cmd.dfs_dir_oclass.update(oclass) self.ior_default_flags = self.ior_w_flags self.ior_cmd.repetitions.update(self.ior_test_repetitions) self.log.info(self.pool.pool_percentage_used()) self.start_ior_load(storage='NVMe', operation="Auto_Write", percent=pool_fillup) self.log.info(self.pool.pool_percentage_used()) else: self.run_ior_thread("Write", oclass, test_seq) self.run_mdtest_thread(oclass) if self.test_with_snapshot is True: # Create a snapshot of the container # after IOR job completes. self.container.create_snap() self.log.info("Created container snapshot: %s", self.container.epoch) if self.test_during_aggregation is True: self.run_ior_thread("Write", oclass, test_seq) # Exclude all the ranks random_pool = random.randint(0, (num_pool - 1)) #nosec for _ in range(0, self.loop_test_cnt): for val, _ in enumerate(rank): self.pool = pool[random_pool] self.pool.display_pool_daos_space("Pool space: Beginning") pver_begin = self.get_pool_version() self.log.info("Pool Version at the beginning %s", pver_begin) if server_boot is False: if (self.test_during_rebuild is True and val == 0): # Exclude rank 5 output = self.dmg_command.pool_exclude( self.pool.uuid, "5") self.print_and_assert_on_rebuild_failure(output) if self.test_during_aggregation is True: self.delete_extra_container(self.pool) self.simple_osa_reintegrate_loop(rank[val]) # For redundancy factor testing, just exclude only # one target on a rank. Don't exclude a rank(s). if (self.test_with_rf is True and val == 0): output = self.dmg_command.pool_exclude( self.pool.uuid, rank[val]) elif (self.test_with_rf is True and val > 0): continue else: if pool_fillup > 0 and val > 0: continue output = self.dmg_command.pool_exclude( self.pool.uuid, rank[val]) else: output = self.dmg_command.system_stop(ranks=rank[val], force=True) self.print_and_assert_on_rebuild_failure(output) output = self.dmg_command.system_start(ranks=rank[val]) # Just try to reintegrate rank 5 if (self.test_during_rebuild is True and val == 2): # Reintegrate rank 5 output = self.dmg_command.pool_reintegrate( self.pool.uuid, "5") self.print_and_assert_on_rebuild_failure(output) pver_exclude = self.get_pool_version() self.log.info("Pool Version after exclude %s", pver_exclude) # Check pool version incremented after pool exclude # pver_exclude should be greater than # pver_begin + 1 (1 target + exclude) self.assertTrue(pver_exclude > (pver_begin + 1), "Pool Version Error: After exclude") # Reintegrate the ranks which was excluded for val, _ in enumerate(rank): if self.test_with_blank_node is True: ip_addr, p_num = self.get_ipaddr_for_rank(rank[val]) self.remove_pool_dir(ip_addr, p_num) if (val == 2 and "RP_2G" in oclass): output = self.dmg_command.pool_reintegrate( self.pool.uuid, rank[val], "0,2") elif (self.test_with_rf is True and val == 0): output = self.dmg_command.pool_reintegrate( self.pool.uuid, rank[val]) elif (self.test_with_rf is True and val > 0): continue else: if pool_fillup > 0 and val > 0: continue output = self.dmg_command.pool_reintegrate( self.pool.uuid, rank[val]) self.print_and_assert_on_rebuild_failure(output) pver_reint = self.get_pool_version() self.log.info("Pool Version after reintegrate %d", pver_reint) # Check pool version incremented after pool reintegrate self.assertTrue(pver_reint > pver_exclude, "Pool Version Error: After reintegrate") display_string = "Pool{} space at the End".format(random_pool) self.pool = pool[random_pool] self.pool.display_pool_daos_space(display_string) # Finally check whether the written data can be accessed. # Also, run the daos cont check (for object integrity) for val in range(0, num_pool): self.pool = pool[val] if data: if pool_fillup > 0: self.start_ior_load(storage='NVMe', operation='Auto_Read', percent=pool_fillup) else: self.run_ior_thread("Read", oclass, test_seq) self.run_mdtest_thread(oclass) self.container = self.pool_cont_dict[self.pool][0] kwargs = { "pool": self.pool.uuid, "cont": self.container.uuid } output = self.daos_command.container_check(**kwargs) self.log.info(output) def test_osa_offline_reintegration_without_checksum(self): """Test ID: DAOS-6923 Test Description: Validate Offline Reintegration without enabling checksum in container properties. :avocado: tags=all,pr,daily_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa,offline_reintegration_daily,ior :avocado: tags=offline_reintegration_without_csum """ self.test_with_checksum = self.params.get("test_with_checksum", '/run/checksum/*') self.log.info("Offline Reintegration : Without Checksum") self.run_offline_reintegration_test(1, data=True) def test_osa_offline_reintegration_multiple_pools(self): """Test ID: DAOS-6923 Test Description: Validate Offline Reintegration with multiple pools :avocado: tags=all,daily_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa,checksum :avocado: tags=offline_reintegration_daily,ior :avocado: tags=offline_reintegration_multiple_pools """ self.log.info("Offline Reintegration : Multiple Pools") self.run_offline_reintegration_test(5, data=True) def test_osa_offline_reintegration_server_stop(self): """Test ID: DAOS-6748. Test Description: Validate Offline Reintegration with server stop :avocado: tags=all,pr,full_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa,checksum :avocado: tags=offline_reintegration_full,ior :avocado: tags=offline_reintegration_srv_stop """ self.log.info("Offline Reintegration : System Start/Stop") self.run_offline_reintegration_test(1, data=True, server_boot=True) def test_osa_offline_reintegrate_during_rebuild(self): """Test ID: DAOS-6923 Test Description: Reintegrate rank while rebuild is happening in parallel :avocado: tags=all,full_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa,offline_reintegration_full,ior :avocado: tags=offline_reintegrate_during_rebuild """ self.loop_test_cnt = self.params.get("iterations", '/run/loop_test/*') self.test_during_rebuild = self.params.get("test_with_rebuild", '/run/rebuild/*') self.log.info("Offline Reintegration : Rebuild") self.run_offline_reintegration_test(1, data=True) def test_osa_offline_reintegration_oclass(self): """Test ID: DAOS-6923 Test Description: Validate Offline Reintegration with different object class :avocado: tags=all,full_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa,offline_reintegration_full,ior :avocado: tags=offline_reintegration_oclass """ self.log.info("Offline Reintegration : Object Class") for oclass in self.test_oclass: self.run_offline_reintegration_test(1, data=True, server_boot=False, oclass=oclass) def test_osa_offline_reintegrate_during_aggregation(self): """Test ID: DAOS-6923 Test Description: Reintegrate rank while aggregation is happening in parallel :avocado: tags=all,full_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa,offline_reintegration_full,ior :avocado: tags=offline_reintegrate_during_aggregation """ self.test_during_aggregation = self.params.get("test_with_aggregation", '/run/aggregation/*') self.log.info("Offline Reintegration : Aggregation") self.run_offline_reintegration_test(1, data=True) def test_osa_offline_reintegration_with_rf(self): """Test ID: DAOS-6923 Test Description: Validate Offline Reintegration with just redundancy factor setting. Don't set the oclass during ior run. :avocado: tags=all,daily_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa,checksum :avocado: tags=offline_reintegration_full,mpich :avocado: tags=offline_reintegration_with_rf """ self.log.info("Offline Reintegration : RF") self.test_with_rf = self.params.get("test_with_rf", '/run/test_rf/*') self.run_offline_reintegration_test(1, data=True) def test_osa_offline_reintegrate_with_blank_node(self): """Test ID: DAOS-6923 Test Description: Reintegrate rank with no data. :avocado: tags=all,full_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa,offline_reintegration_full :avocado: tags=offline_reintegrate_with_blank_node """ self.test_with_blank_node = self.params.get("test_with_blank_node", '/run/blank_node/*') self.log.info("Offline Reintegration : Test with blank node") self.run_offline_reintegration_test(1, data=True) def test_osa_offline_reintegrate_after_snapshot(self): """Test ID: DAOS-8057 Test Description: Reintegrate rank after taking snapshot. :avocado: tags=all,daily_regression,hw,medium,ib2 :avocado: tags=osa,offline_reintegration_full :avocado: tags=offline_reintegrate_after_snapshot """ self.test_with_snapshot = self.params.get("test_with_snapshot", '/run/snapshot/*') self.log.info("Offline Reintegration : Test with snapshot") self.run_offline_reintegration_test(1, data=True) def test_osa_offline_reintegrate_with_less_pool_space(self): """Test ID: DAOS-7160 Test Description: Reintegrate rank will less pool space. :avocado: tags=all,full_regression,hw,medium,ib2 :avocado: tags=osa,offline_reintegration_full :avocado: tags=offline_reintegrate_with_less_pool_space """ self.log.info("Offline Reintegration : Test with less pool space") oclass = self.params.get("pool_test_oclass", '/run/pool_capacity/*') pool_fillup = self.params.get("pool_fillup", '/run/pool_capacity/*') self.run_offline_reintegration_test(1, data=True, oclass=oclass, pool_fillup=pool_fillup)
class OSAOfflineParallelTest(OSAUtils): # pylint: disable=too-many-ancestors """ Test Class Description: This test runs daos_server offline drain,reintegration, extend test cases in parallel. :avocado: recursive """ def setUp(self): """Set up for test case.""" super().setUp() self.dmg_command = self.get_dmg_command() self.daos_command = DaosCommand(self.bin) self.ior_test_sequence = self.params.get("ior_test_sequence", '/run/ior/iorflags/*') # Start an additional server. self.extra_servers = self.params.get("test_servers", "/run/extra_servers/*") self.test_oclass = self.params.get("oclass", '/run/test_obj_class/*') self.out_queue = queue.Queue() self.dmg_command.exit_status_exception = True self.server_boot = None def dmg_thread(self, action, action_args, results): """Generate different dmg command related to OSA. Args: action_args(dict) : {action: {"puuid": pool[val].uuid, "rank": rank, "target": t_string, "action": action,} results (queue) : dmg command output queue. """ dmg = copy.copy(self.dmg_command) try: if action == "reintegrate": text = "Waiting for rebuild to complete" time.sleep(3) self.print_and_assert_on_rebuild_failure(text) # For each action, read the values from the # dictionary. # example {"exclude" : {"puuid": self.pool, "rank": rank # "target": t_string, "action": exclude}} # getattr is used to obtain the method in dmg object. # eg: dmg -> pool_exclude method, then pass arguments like # puuid, rank, target to the pool_exclude method. if action == "exclude" and self.server_boot is True: ranks = action_args[action][1] getattr(dmg, "system stop --ranks={}".format(ranks)) output = "Stopping the rank : {}".format(ranks) self.print_and_assert_on_rebuild_failure(output) getattr(dmg, "system start --ranks={}".format(ranks)) self.print_and_assert_on_rebuild_failure(output) else: getattr(dmg, "pool_{}".format(action))(**action_args[action]) except CommandFailure as _error: results.put("{} failed".format(action)) def run_offline_parallel_test(self, num_pool, data=False, oclass=None): """Run multiple OSA commands in parallel with or without data. Args: num_pool (int) : total pools to create for testing purposes. data (bool) : whether pool has no data or to create some data in pool. Defaults to False. oclass (str) : Daos object class (RP_2G1,etc) """ # Create a pool pool = {} pool_uuid = [] target_list = [] if oclass is None: oclass = self.ior_cmd.dfs_oclass.value # Exclude target : random two targets (target idx : 0-7) n = random.randint(0, 6) target_list.append(n) target_list.append(n + 1) t_string = "{},{}".format(target_list[0], target_list[1]) # Exclude rank 2. rank = 2 test_seq = self.ior_test_sequence[0] for val in range(0, num_pool): pool[val] = TestPool(self.context, dmg_command=self.get_dmg_command()) pool[val].get_params(self) pool[val].create() pool_uuid.append(pool[val].uuid) self.pool = pool[val] self.pool.set_property("reclaim", "disabled") if data: self.run_ior_thread("Write", oclass, test_seq) if oclass != "S1": self.run_mdtest_thread() # if self.test_during_aggregation is set, # Create another container and run the IOR # command using the second container. if self.test_during_aggregation is True: self.run_ior_thread("Write", oclass, test_seq) # Start the additional servers and extend the pool self.log.info("Extra Servers = %s", self.extra_servers) self.start_additional_servers(self.extra_servers) # Give sometime for the additional server to come up. for retry in range(0, 10): scan_info = self.get_dmg_command().system_query() if not check_system_query_status(scan_info): if retry == 9: self.fail("One or more servers not in expected status") else: break # Exclude and reintegrate the pool_uuid, rank and targets for val in range(0, num_pool): self.pool = pool[val] self.pool.display_pool_daos_space("Pool space: Beginning") pver_begin = self.get_pool_version() self.log.info("Pool Version at the beginning %s", pver_begin) # If we need to trigger aggregation on pool 1, delete # the second container which has IOR data. if self.test_during_aggregation is True and val == 0: self.delete_extra_container(self.pool) # Create the threads here threads = [] # Action dictionary with OSA dmg command parameters action_args = { "drain": { "pool": self.pool.uuid, "rank": rank, "tgt_idx": None }, "exclude": { "pool": self.pool.uuid, "rank": (rank + 1), "tgt_idx": t_string }, "reintegrate": { "pool": self.pool.uuid, "rank": (rank + 1), "tgt_idx": t_string }, "extend": { "pool": self.pool.uuid, "ranks": (rank + 2), "scm_size": self.pool.scm_size, "nvme_size": self.pool.nvme_size } } for action in sorted(action_args): # Add a dmg thread process = threading.Thread(target=self.dmg_thread, kwargs={ "action": action, "action_args": action_args, "results": self.out_queue }) process.start() threads.append(process) # Wait to finish the threads for thrd in threads: thrd.join() time.sleep(5) # Check the queue for any failure. tmp_list = list(self.out_queue.queue) for failure in tmp_list: if "FAIL" in failure: self.fail("Test failed : {0}".format(failure)) for val in range(0, num_pool): display_string = "Pool{} space at the End".format(val) pool[val].display_pool_daos_space(display_string) self.is_rebuild_done(3) self.assert_on_rebuild_failure() pver_end = self.get_pool_version() self.log.info("Pool Version at the End %s", pver_end) self.assertTrue(pver_end >= 26, "Pool Version Error: at the end") if data: self.run_ior_thread("Read", oclass, test_seq) if oclass != "S1": self.run_mdtest_thread() self.container = self.pool_cont_dict[self.pool][0] kwargs = {"pool": self.pool.uuid, "cont": self.container.uuid} output = self.daos_command.container_check(**kwargs) self.log.info(output) @skipForTicket("DAOS-7247") def test_osa_offline_parallel_test(self): """ JIRA ID: DAOS-4752 Test Description: Runs multiple OSA commands in parallel. :avocado: tags=all,daily_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa,checksum :avocado: tags=offline_parallel,offline_parallel_basic_test """ self.log.info("Offline Parallel Test: Basic Test") self.run_offline_parallel_test(1, data=True) @skipForTicket("DAOS-7247") def test_osa_offline_parallel_test_without_csum(self): """ JIRA ID: DAOS-7161 Test Description: Runs multiple OSA commands in parallel without enabling checksum. :avocado: tags=all,full_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa :avocado: tags=offline_parallel,offline_parallel_without_csum """ self.test_with_checksum = self.params.get("test_with_checksum", '/run/checksum/*') self.log.info("Offline Parallel Test: Without Checksum") self.run_offline_parallel_test(1, data=True) @skipForTicket("DAOS-7247") def test_osa_offline_parallel_test_rank_boot(self): """ JIRA ID: DAOS-7161 Test Description: Runs multiple OSA commands in parallel with a rank rebooted using system stop/start. :avocado: tags=all,full_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa :avocado: tags=offline_parallel,offline_parallel_srv_rank_boot """ self.test_with_checksum = self.params.get("test_with_checksum", '/run/checksum/*') self.server_boot = self.params.get("flags", '/run/system_stop_start/*') self.log.info("Offline Parallel Test: Restart a rank") self.run_offline_parallel_test(1, data=True) @skipForTicket("DAOS-7195,DAOS-7247") def test_osa_offline_parallel_test_with_aggregation(self): """ JIRA ID: DAOS-7161 Test Description: Runs multiple OSA commands in parallel with aggregation turned on. :avocado: tags=all,full_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa :avocado: tags=offline_parallel,offline_parallel_with_aggregation """ self.test_during_aggregation = self.params.get("test_with_aggregation", '/run/aggregation/*') self.log.info("Offline Parallel Test : Aggregation") self.run_offline_parallel_test(1, data=True) @skipForTicket("DAOS-7247") def test_osa_offline_parallel_test_oclass(self): """ JIRA ID: DAOS-7161 Test Description: Runs multiple OSA commands in parallel with different object class. :avocado: tags=all,full_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa :avocado: tags=offline_parallel,offline_parallel_oclass """ self.log.info("Offline Parallel Test : OClass") # Presently, the script is limited and supports only one extra # object class testing. We are testing S1 apart from RP_2G1. self.run_offline_parallel_test(1, data=True, oclass=self.test_oclass[0])
class OSAOfflineDrain(OSAUtils): # pylint: disable=too-many-ancestors """ Test Class Description: This test runs daos_server offline drain test cases. :avocado: recursive """ def setUp(self): """Set up for test case.""" super().setUp() self.dmg_command = self.get_dmg_command() self.daos_command = DaosCommand(self.bin) self.ranks = self.params.get("rank_list", '/run/test_ranks/*') self.test_oclass = self.params.get("oclass", '/run/test_obj_class/*') self.ior_test_sequence = self.params.get("ior_test_sequence", '/run/ior/iorflags/*') # Recreate the client hostfile without slots defined self.hostfile_clients = write_host_file(self.hostlist_clients, self.workdir, None) def simple_drain_reintegrate_loop(self, rank, loop_time=100): """This method performs drain and reintegration on a rank, for a certain amount of time. Args: rank (int): daos server rank. loop_time: Total time to perform drain/reintegrate operation in a loop. (Default : 100 secs) """ start_time = 0 finish_time = 0 while int(finish_time - start_time) < loop_time: start_time = time.time() output = self.dmg_command.pool_drain(self.pool.uuid, rank) self.print_and_assert_on_rebuild_failure(output) output = self.dmg_command.pool_reintegrate(self.pool.uuid, rank) self.print_and_assert_on_rebuild_failure(output) finish_time = time.time() def run_offline_drain_test(self, num_pool, data=False, oclass=None): """Run the offline drain without data. Args: num_pool (int) : total pools to create for testing purposes. data (bool) : whether pool has no data or to create some data in pool. Defaults to False. oclass (str): DAOS object class (eg: RP_2G1,etc) """ # Create a pool pool = {} target_list = [] if oclass is None: oclass = self.ior_cmd.dfs_oclass.value # Exclude target : random two targets (target idx : 0-7) n = random.randint(0, 6) target_list.append(n) target_list.append(n + 1) t_string = "{},{}".format(target_list[0], target_list[1]) for val in range(0, num_pool): pool[val] = TestPool(self.context, dmg_command=self.dmg_command) pool[val].get_params(self) pool[val].create() self.pool = pool[val] self.pool.set_property("reclaim", "disabled") test_seq = self.ior_test_sequence[0] if data: self.run_ior_thread("Write", oclass, test_seq) self.run_mdtest_thread() if self.test_during_aggregation is True: self.run_ior_thread("Write", oclass, test_seq) # Drain ranks and targets for val in range(0, num_pool): # Drain ranks provided in YAML file for index, rank in enumerate(self.ranks): self.pool = pool[val] # If we are testing using multiple pools, reintegrate # the rank back and then drain. self.pool.display_pool_daos_space("Pool space: Beginning") pver_begin = self.get_pool_version() self.log.info("Pool Version at the beginning %s", pver_begin) if self.test_during_aggregation is True and index == 0: self.pool.set_property("reclaim", "time") self.delete_extra_container(self.pool) self.simple_drain_reintegrate_loop(rank) if (self.test_during_rebuild is True and val == 0): # Exclude rank 3 output = self.dmg_command.pool_exclude(self.pool.uuid, "3") self.pool.wait_for_rebuild(True) output = self.dmg_command.pool_drain(self.pool.uuid, rank, t_string) self.print_and_assert_on_rebuild_failure(output) pver_drain = self.get_pool_version() self.log.info("Pool Version after drain %d", pver_drain) # Check pool version incremented after pool drain self.assertTrue(pver_drain > (pver_begin + 1), "Pool Version Error: After drain") if num_pool > 1: output = self.dmg_command.pool_reintegrate( self.pool.uuid, rank, t_string) self.print_and_assert_on_rebuild_failure(output) if (self.test_during_rebuild is True and val == 0): # Reintegrate rank 3 output = self.dmg_command.pool_reintegrate( self.pool.uuid, "3") self.print_and_assert_on_rebuild_failure(output) for val in range(0, num_pool): display_string = "Pool{} space at the End".format(val) pool[val].display_pool_daos_space(display_string) if data: self.run_ior_thread("Read", oclass, test_seq) self.run_mdtest_thread() self.container = self.pool_cont_dict[self.pool][0] kwargs = {"pool": self.pool.uuid, "cont": self.container.uuid} output = self.daos_command.container_check(**kwargs) self.log.info(output) def test_osa_offline_drain(self): """ JIRA ID: DAOS-4750 Test Description: Validate Offline Drain :avocado: tags=all,pr,daily_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa,osa_drain,checksum,mpich :avocado: tags=offline_drain,offline_drain_with_csum """ self.log.info("Offline Drain : Basic Drain") self.run_offline_drain_test(1, True) def test_osa_offline_drain_without_checksum(self): """Test ID: DAOS-7159 Test Description: Validate Offline Drain without enabling checksum in container properties. :avocado: tags=all,full_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa,osa_drain :avocado: tags=offline_drain,offline_drain_without_csum """ self.test_with_checksum = self.params.get("test_with_checksum", '/run/checksum/*') self.log.info("Offline Drain : Without Checksum") self.run_offline_drain_test(1, data=True) def test_osa_offline_drain_during_aggregation(self): """Test ID: DAOS-7159 Test Description: Validate Offline Drain during aggregation :avocado: tags=all,daily_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa,osa_drain,checksum :avocado: tags=offline_drain,offline_drain_during_aggregation """ self.test_during_aggregation = self.params.get("test_with_aggregation", '/run/aggregation/*') self.log.info("Offline Drain : During Aggregation") self.run_offline_drain_test(1, data=True) def test_osa_offline_drain_oclass(self): """Test ID: DAOS-7159 Test Description: Validate Offline Drain with different object class :avocado: tags=all,full_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa,osa_drain :avocado: tags=offline_drain,offline_drain_oclass """ self.test_with_checksum = self.params.get("test_with_checksum", '/run/checksum/*') self.log.info("Offline Drain : Oclass") for oclass in self.test_oclass: self.run_offline_drain_test(1, data=True, oclass=oclass) def test_osa_offline_drain_multiple_pools(self): """Test ID: DAOS-7159 Test Description: Validate Offline Drain with multiple pools :avocado: tags=all,full_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa,osa_drain :avocado: tags=offline_drain,offline_drain_multiple_pools """ self.log.info("Offline Drain : Multiple Pools") self.run_offline_drain_test(2, data=True) @skipForTicket("DAOS-7186") def test_osa_offline_drain_during_rebuild(self): """Test ID: DAOS-7159 Test Description: Validate Offline Drain during rebuild :avocado: tags=all,full_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa,osa_drain :avocado: tags=offline_drain,offline_drain_during_rebuild """ self.test_during_rebuild = self.params.get("test_with_rebuild", '/run/rebuild/*') self.log.info("Offline Drain : During Rebuild") self.run_offline_drain_test(1, data=True)
class OSAOfflineDrain(OSAUtils, ServerFillUp): # pylint: disable=too-many-ancestors """ Test Class Description: This test runs daos_server offline drain test cases. :avocado: recursive """ def setUp(self): """Set up for test case.""" super().setUp() self.dmg_command = self.get_dmg_command() self.daos_command = DaosCommand(self.bin) self.ranks = self.params.get("rank_list", '/run/test_ranks/*') self.test_oclass = self.params.get("oclass", '/run/test_obj_class/*') self.ior_test_sequence = self.params.get("ior_test_sequence", '/run/ior/iorflags/*') # Recreate the client hostfile without slots defined self.hostfile_clients = write_host_file(self.hostlist_clients, self.workdir, None) def run_offline_drain_test(self, num_pool, data=False, oclass=None, pool_fillup=0): """Run the offline drain without data. Args: num_pool (int) : total pools to create for testing purposes. data (bool) : whether pool has no data or to create some data in pool. Defaults to False. oclass (str): DAOS object class (eg: RP_2G1,etc) """ # Create a pool pool = {} target_list = [] if oclass is None: oclass = self.ior_cmd.dfs_oclass.value # Exclude target : random two targets (target idx : 0-7) n = random.randint(0, 6) #nosec target_list.append(n) target_list.append(n + 1) t_string = "{},{}".format(target_list[0], target_list[1]) for val in range(0, num_pool): pool[val] = TestPool(context=self.context, dmg_command=self.get_dmg_command(), label_generator=self.label_generator) pool[val].get_params(self) pool[val].create() self.pool = pool[val] self.pool.set_property("reclaim", "disabled") test_seq = self.ior_test_sequence[0] if data: # if pool_fillup is greater than 0, then # use start_ior_load method from nvme_utils.py. # Otherwise, use the osa_utils.py run_ior_thread # method. if pool_fillup > 0: self.ior_cmd.dfs_oclass.update(oclass) self.ior_cmd.dfs_dir_oclass.update(oclass) self.ior_default_flags = self.ior_w_flags self.log.info(self.pool.pool_percentage_used()) self.start_ior_load(storage='NVMe', operation="Auto_Write", percent=pool_fillup) self.log.info(self.pool.pool_percentage_used()) else: self.run_ior_thread("Write", oclass, test_seq) self.run_mdtest_thread(oclass) if self.test_with_snapshot is True: # Create a snapshot of the container # after IOR job completes. self.container.create_snap() self.log.info("Created container snapshot: %s", self.container.epoch) if self.test_during_aggregation is True: self.run_ior_thread("Write", oclass, test_seq) # Drain ranks and targets for val in range(0, num_pool): # Drain ranks provided in YAML file for index, rank in enumerate(self.ranks): self.pool = pool[val] # If we are testing using multiple pools, reintegrate # the rank back and then drain. self.pool.display_pool_daos_space("Pool space: Beginning") pver_begin = self.get_pool_version() self.log.info("Pool Version at the beginning %s", pver_begin) if self.test_during_aggregation is True and index == 0: self.pool.set_property("reclaim", "time") self.delete_extra_container(self.pool) self.simple_osa_reintegrate_loop(rank=rank, action="drain") if (self.test_during_rebuild is True and val == 0): # Exclude rank 3 output = self.dmg_command.pool_exclude(self.pool.uuid, "3") self.pool.wait_for_rebuild(True) # If the pool is filled up just drain only a single rank. if pool_fillup > 0 and index > 0: continue output = self.dmg_command.pool_drain(self.pool.uuid, rank, t_string) self.print_and_assert_on_rebuild_failure(output) pver_drain = self.get_pool_version() self.log.info("Pool Version after drain %d", pver_drain) # Check pool version incremented after pool drain self.assertTrue(pver_drain > (pver_begin + 1), "Pool Version Error: After drain") if num_pool > 1: output = self.dmg_command.pool_reintegrate( self.pool.uuid, rank, t_string) self.print_and_assert_on_rebuild_failure(output) if (self.test_during_rebuild is True and val == 0): # Reintegrate rank 3 output = self.dmg_command.pool_reintegrate( self.pool.uuid, "3") self.print_and_assert_on_rebuild_failure(output) for val in range(0, num_pool): display_string = "Pool{} space at the End".format(val) pool[val].display_pool_daos_space(display_string) if data: if pool_fillup > 0: self.start_ior_load(storage='NVMe', operation='Auto_Read', percent=pool_fillup) else: self.run_ior_thread("Read", oclass, test_seq) self.run_mdtest_thread(oclass) self.container = self.pool_cont_dict[self.pool][0] kwargs = { "pool": self.pool.uuid, "cont": self.container.uuid } output = self.daos_command.container_check(**kwargs) self.log.info(output) def test_osa_offline_drain(self): """ JIRA ID: DAOS-4750 Test Description: Validate Offline Drain :avocado: tags=all,pr,daily_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa,osa_drain,checksum,ior :avocado: tags=offline_drain,offline_drain_with_csum """ self.log.info("Offline Drain : Basic Drain") self.run_offline_drain_test(1, True) def test_osa_offline_drain_without_checksum(self): """Test ID: DAOS-7159 Test Description: Validate Offline Drain without enabling checksum in container properties. :avocado: tags=all,full_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa,osa_drain :avocado: tags=offline_drain,offline_drain_without_csum """ self.test_with_checksum = self.params.get("test_with_checksum", '/run/checksum/*') self.log.info("Offline Drain : Without Checksum") self.run_offline_drain_test(1, data=True) def test_osa_offline_drain_during_aggregation(self): """Test ID: DAOS-7159 Test Description: Validate Offline Drain during aggregation :avocado: tags=all,daily_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa,osa_drain,checksum :avocado: tags=offline_drain,offline_drain_during_aggregation """ self.test_during_aggregation = self.params.get("test_with_aggregation", '/run/aggregation/*') self.log.info("Offline Drain : During Aggregation") self.run_offline_drain_test(1, data=True) def test_osa_offline_drain_oclass(self): """Test ID: DAOS-7159 Test Description: Validate Offline Drain with different object class :avocado: tags=all,full_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa,osa_drain :avocado: tags=offline_drain,offline_drain_oclass """ self.test_with_checksum = self.params.get("test_with_checksum", '/run/checksum/*') self.log.info("Offline Drain : Oclass") for oclass in self.test_oclass: self.run_offline_drain_test(1, data=True, oclass=oclass) def test_osa_offline_drain_multiple_pools(self): """Test ID: DAOS-7159 Test Description: Validate Offline Drain with multiple pools :avocado: tags=all,full_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa,osa_drain :avocado: tags=offline_drain,offline_drain_multiple_pools """ self.log.info("Offline Drain : Multiple Pools") self.run_offline_drain_test(2, data=True) def test_osa_offline_drain_during_rebuild(self): """Test ID: DAOS-7159 Test Description: Validate Offline Drain during rebuild :avocado: tags=all,full_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa,osa_drain :avocado: tags=offline_drain,offline_drain_during_rebuild """ self.test_during_rebuild = self.params.get("test_with_rebuild", '/run/rebuild/*') self.log.info("Offline Drain : During Rebuild") self.run_offline_drain_test(1, data=True) def test_osa_offline_drain_after_snapsot(self): """Test ID: DAOS-8057 Test Description: Validate Offline Drain after taking snapshot. :avocado: tags=all,daily_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa,osa_drain,checksum :avocado: tags=offline_drain,offline_drain_after_snapshot """ self.test_with_snapshot = self.params.get("test_with_snapshot", '/run/snapshot/*') self.log.info("Offline Drain : After taking snapshot") self.run_offline_drain_test(1, data=True) def test_osa_offline_drain_with_less_pool_space(self): """Test ID: DAOS-7160 Test Description: Drain rank after with less pool space. :avocado: tags=all,full_regression,hw,medium,ib2 :avocado: tags=osa,offline_drain_full :avocado: tags=offline_drain_with_less_pool_space """ self.log.info("Offline Drain : Test with less pool space") oclass = self.params.get("pool_test_oclass", '/run/pool_capacity/*') pool_fillup = self.params.get("pool_fillup", '/run/pool_capacity/*') self.run_offline_drain_test(1, data=True, oclass=oclass, pool_fillup=pool_fillup)
class OSAOfflineReintegration(OSAUtils): # pylint: disable=too-many-ancestors """OSA offline Reintegration test cases. Test Class Description: This test runs daos_server offline reintegration test cases. :avocado: recursive """ def setUp(self): """Set up for test case.""" super().setUp() self.dmg_command = self.get_dmg_command() self.daos_command = DaosCommand(self.bin) self.ior_test_sequence = self.params.get("ior_test_sequence", '/run/ior/iorflags/*') self.test_oclass = self.params.get("oclass", '/run/test_obj_class/*') self.loop_test_cnt = 1 # Recreate the client hostfile without slots defined self.hostfile_clients = write_host_file(self.hostlist_clients, self.workdir, None) self.dmg_command.exit_status_exception = True def run_offline_reintegration_test(self, num_pool, data=False, server_boot=False, oclass=None): """Run the offline reintegration without data. Args: num_pool (int) : total pools to create for testing purposes. data (bool) : whether pool has no data or to create some data in pool. Defaults to False. server_boot (bool) : Perform system stop/start on a rank. Defaults to False. oclass (str) : daos object class string (eg: "RP_2G8") """ # Create a pool pool = {} random_pool = 0 if oclass is None: oclass = self.ior_cmd.dfs_oclass.value # Exclude ranks [0, 3, 4] rank = [0, 3, 4] for val in range(0, num_pool): pool[val] = TestPool(self.context, dmg_command=self.get_dmg_command()) pool[val].get_params(self) pool[val].create() self.pool = pool[val] self.pool.set_property("reclaim", "disabled") test_seq = self.ior_test_sequence[0] if data: self.run_ior_thread("Write", oclass, test_seq) self.run_mdtest_thread() if self.test_during_aggregation is True: self.run_ior_thread("Write", oclass, test_seq) # Exclude all the ranks random_pool = random.randint(0, (num_pool - 1)) for _ in range(0, self.loop_test_cnt): for val, _ in enumerate(rank): self.pool = pool[random_pool] self.pool.display_pool_daos_space("Pool space: Beginning") pver_begin = self.get_pool_version() self.log.info("Pool Version at the beginning %s", pver_begin) if server_boot is False: if (self.test_during_rebuild is True and val == 0): # Exclude rank 5 output = self.dmg_command.pool_exclude( self.pool.uuid, "5") self.print_and_assert_on_rebuild_failure(output) if self.test_during_aggregation is True: self.delete_extra_container(self.pool) self.simple_exclude_reintegrate_loop(rank[val]) output = self.dmg_command.pool_exclude( self.pool.uuid, rank[val]) # Check the IOR data after exclude if data: self.run_ior_thread("Read", oclass, test_seq) else: output = self.dmg_command.system_stop(ranks=rank[val], force=True) self.print_and_assert_on_rebuild_failure(output) # Check the IOR data after system stop if data and (val == 0): self.run_ior_thread("Read", oclass, test_seq) output = self.dmg_command.system_start(ranks=rank[val]) # Just try to reintegrate rank 5 if (self.test_during_rebuild is True and val == 2): # Reintegrate rank 5 output = self.dmg_command.pool_reintegrate( self.pool.uuid, "5") self.print_and_assert_on_rebuild_failure(output) pver_exclude = self.get_pool_version() self.log.info("Pool Version after exclude %s", pver_exclude) # Check pool version incremented after pool exclude # pver_exclude should be greater than # pver_begin + 3 (2 targets + exclude) self.assertTrue(pver_exclude > (pver_begin + 3), "Pool Version Error: After exclude") # Reintegrate the ranks which was excluded for val, _ in enumerate(rank): if (val == 2 and "RP_2G" in oclass): output = self.dmg_command.pool_reintegrate( self.pool.uuid, rank[val], "0,2") else: output = self.dmg_command.pool_reintegrate( self.pool.uuid, rank[val]) self.print_and_assert_on_rebuild_failure(output) pver_reint = self.get_pool_version() self.log.info("Pool Version after reintegrate %d", pver_reint) # Check pool version incremented after pool reintegrate self.assertTrue(pver_reint > (pver_exclude + 1), "Pool Version Error: After reintegrate") display_string = "Pool{} space at the End".format(random_pool) self.pool = pool[random_pool] self.pool.display_pool_daos_space(display_string) # Finally check whether the written data can be accessed. # Also, run the daos cont check (for object integrity) for val in range(0, num_pool): self.pool = pool[val] if data: self.run_ior_thread("Read", oclass, test_seq) self.run_mdtest_thread() self.container = self.pool_cont_dict[self.pool][0] kwargs = {"pool": self.pool.uuid, "cont": self.container.uuid} output = self.daos_command.container_check(**kwargs) self.log.info(output) def test_osa_offline_reintegration_without_checksum(self): """Test ID: DAOS-6923 Test Description: Validate Offline Reintegration without enabling checksum in container properties. :avocado: tags=all,pr,daily_regression,hw,medium,ib2 :avocado: tags=osa,offline_reintegration_daily :avocado: tags=offline_reintegration_without_csum """ self.test_with_checksum = self.params.get("test_with_checksum", '/run/checksum/*') self.log.info("Offline Reintegration : Without Checksum") self.run_offline_reintegration_test(1, data=True) def test_osa_offline_reintegration_multiple_pools(self): """Test ID: DAOS-6923 Test Description: Validate Offline Reintegration with multiple pools :avocado: tags=all,daily_regression,hw,medium,ib2 :avocado: tags=osa,offline_reintegration_daily :avocado: tags=offline_reintegration_multiple_pools """ self.log.info("Offline Reintegration : Multiple Pools") self.run_offline_reintegration_test(5, data=True) @skipForTicket("DAOS-6807") def test_osa_offline_reintegration_server_stop(self): """Test ID: DAOS-6748. Test Description: Validate Offline Reintegration with server stop :avocado: tags=all,pr,daily_regression,hw,medium,ib2 :avocado: tags=osa,offline_reintegration_daily :avocado: tags=offline_reintegration_srv_stop """ self.log.info("Offline Reintegration : System Start/Stop") self.run_offline_reintegration_test(1, data=True, server_boot=True) def test_osa_offline_reintegrate_during_rebuild(self): """Test ID: DAOS-6923 Test Description: Reintegrate rank while rebuild is happening in parallel :avocado: tags=all,full_regression,hw,medium,ib2 :avocado: tags=osa,offline_reintegration_full :avocado: tags=offline_reintegrate_during_rebuild """ self.loop_test_cnt = self.params.get("iterations", '/run/loop_test/*') self.test_during_rebuild = self.params.get("test_with_rebuild", '/run/rebuild/*') self.log.info("Offline Reintegration : Rebuild") self.run_offline_reintegration_test(1, data=True) def test_osa_offline_reintegration_oclass(self): """Test ID: DAOS-6923 Test Description: Validate Offline Reintegration with different object class :avocado: tags=all,full_regression,hw,medium,ib2 :avocado: tags=osa,offline_reintegration_full :avocado: tags=offline_reintegration_oclass """ self.log.info("Offline Reintegration : Object Class") for oclass in self.test_oclass: self.run_offline_reintegration_test(1, data=True, server_boot=False, oclass=oclass) def test_osa_offline_reintegrate_during_aggregation(self): """Test ID: DAOS-6923 Test Description: Reintegrate rank while aggregation is happening in parallel :avocado: tags=all,full_regression,hw,medium,ib2 :avocado: tags=osa,offline_reintegration_full :avocado: tags=offline_reintegrate_during_aggregation """ self.test_during_aggregation = self.params.get("test_with_aggregation", '/run/aggregation/*') self.log.info("Offline Reintegration : Aggregation") self.run_offline_reintegration_test(1, data=True)
class OSAOnlineExtend(OSAUtils): # pylint: disable=too-many-ancestors """ Test Class Description: This test runs daos_server Online Extend test cases. :avocado: recursive """ def setUp(self): """Set up for test case.""" super().setUp() self.dmg_command = self.get_dmg_command() self.daos_command = DaosCommand(self.bin) self.ior_test_sequence = self.params.get("ior_test_sequence", '/run/ior/iorflags/*') self.test_oclass = self.params.get("oclass", '/run/test_obj_class/*') self.ranks = self.params.get("rank_list", '/run/test_ranks/*') # Start an additional server. self.extra_servers = self.params.get("test_servers", "/run/extra_servers/*") # Recreate the client hostfile without slots defined self.hostfile_clients = write_host_file(self.hostlist_clients, self.workdir, None) self.pool = None self.dmg_command.exit_status_exception = True self.daos_racer = None def daos_racer_thread(self): """Start the daos_racer thread.""" self.daos_racer = DaosRacerCommand(self.bin, self.hostlist_clients[0], self.dmg_command) self.daos_racer.get_params(self) self.daos_racer.set_environment( self.daos_racer.get_environment(self.server_managers[0])) self.daos_racer.run() def run_online_extend_test(self, num_pool, racer=False, oclass=None, app_name="ior"): """Run the Online extend without data. Args: num_pool(int) : total pools to create for testing purposes. racer(bool) : Run the testing along with daos_racer. Defaults to False. oclass(str) : Object Class (eg: RP_2G1, etc). Default to None. app_name(str) : App (ior or mdtest) to run during the testing. Defaults to ior. """ # Pool dictionary pool = {} if oclass is None: oclass = self.ior_cmd.dfs_oclass.value test_seq = self.ior_test_sequence[0] # Start the daos_racer thread if racer is True: daos_racer_thread = threading.Thread(target=self.daos_racer_thread) daos_racer_thread.start() time.sleep(30) for val in range(0, num_pool): pool[val] = TestPool(context=self.context, dmg_command=self.get_dmg_command(), label_generator=self.label_generator) pool[val].get_params(self) pool[val].create() pool[val].set_property("reclaim", "disabled") # Extend the pool_uuid, rank and targets for val in range(0, num_pool): threads = [] self.pool = pool[val] # Start the additional servers and extend the pool self.log.info("Extra Servers = %s", self.extra_servers) self.start_additional_servers(self.extra_servers) if self.test_during_aggregation is True: for _ in range(0, 2): self.run_ior_thread("Write", oclass, test_seq) self.delete_extra_container(self.pool) # The following thread runs while performing osa operations. if app_name == "ior": threads.append( threading.Thread(target=self.run_ior_thread, kwargs={ "action": "Write", "oclass": oclass, "test": test_seq })) else: threads.append(threading.Thread(target=self.run_mdtest_thread)) # Make sure system map has all ranks in joined state. for retry in range(0, 10): scan_info = self.get_dmg_command().system_query() if not check_system_query_status(scan_info): if retry == 9: self.fail("One or more servers not in expected status") else: break # Launch the IOR or mdtest thread for thrd in threads: self.log.info("Thread : %s", thrd) thrd.start() time.sleep(1) self.pool.display_pool_daos_space("Pool space: Beginning") pver_begin = self.get_pool_version() self.log.info("Pool Version at the beginning %s", pver_begin) output = self.dmg_command.pool_extend(self.pool.uuid, self.ranks) self.print_and_assert_on_rebuild_failure(output) pver_extend = self.get_pool_version() self.log.info("Pool Version after extend %s", pver_extend) # Check pool version incremented after pool exclude self.assertTrue(pver_extend > pver_begin, "Pool Version Error: After extend") # Wait to finish the threads for thrd in threads: thrd.join() if not self.out_queue.empty(): self.assert_on_exception() # Check data consistency for IOR in future # Presently, we are running daos_racer in parallel # to IOR and checking the data consistency only # for the daos_racer objects after exclude # and reintegration. if racer is True: daos_racer_thread.join() for val in range(0, num_pool): display_string = "Pool{} space at the End".format(val) self.pool = pool[val] self.pool.display_pool_daos_space(display_string) self.run_ior_thread("Read", oclass, test_seq) self.container = self.pool_cont_dict[self.pool][0] kwargs = {"pool": self.pool.uuid, "cont": self.container.uuid} output = self.daos_command.container_check(**kwargs) self.log.info(output) @skipForTicket("DAOS-7195,DAOS-7955") def test_osa_online_extend(self): """Test ID: DAOS-4751 Test Description: Validate Online extend with checksum enabled. :avocado: tags=all,pr,daily_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa,checksum :avocado: tags=osa_extend,online_extend,online_extend_with_csum """ self.log.info("Online Extend : With Checksum") self.run_online_extend_test(1) @skipForTicket("DAOS-7195,DAOS-7955") def test_osa_online_extend_without_checksum(self): """Test ID: DAOS-6645 Test Description: Validate Online extend without checksum enabled. :avocado: tags=all,pr,daily_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa,checksum :avocado: tags=osa_extend,online_extend,online_extend_without_csum """ self.log.info("Online Extend : Without Checksum") self.test_with_checksum = self.params.get("test_with_checksum", '/run/checksum/*') self.run_online_extend_test(1) @skipForTicket("DAOS-7195,DAOS-7955") def test_osa_online_extend_oclass(self): """Test ID: DAOS-6645 Test Description: Validate Online extend with different object class. :avocado: tags=all,pr,daily_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa,checksum :avocado: tags=osa_extend,online_extend,online_extend_oclass """ self.log.info("Online Extend : Oclass") self.run_online_extend_test(1, oclass=self.test_oclass[0]) @skipForTicket("DAOS-7195,DAOS-7955") def test_osa_online_extend_mdtest(self): """Test ID: DAOS-6645 Test Description: Validate Online extend with mdtest application. :avocado: tags=all,pr,daily_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa,checksum :avocado: tags=osa_extend,online_extend,online_extend_mdtest """ self.log.info("Online Extend : Mdtest") self.run_online_extend_test(1, app_name="mdtest") @skipForTicket("DAOS-7195,DAOS-7955") def test_osa_online_extend_with_aggregation(self): """Test ID: DAOS-6645 Test Description: Validate Online extend with aggregation on. :avocado: tags=all,pr,daily_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa,checksum :avocado: tags=osa_extend,online_extend,online_extend_with_aggregation """ self.log.info("Online Extend : Aggregation") self.test_during_aggregation = self.params.get("test_with_aggregation", '/run/aggregation/*') self.run_online_extend_test(1)
class OSAOnlineDrain(OSAUtils): # pylint: disable=too-many-ancestors """ Test Class Description: This test runs daos_server Online Drain test cases. :avocado: recursive """ def setUp(self): """Set up for test case.""" super().setUp() self.dmg_command = self.get_dmg_command() self.daos_command = DaosCommand(self.bin) self.ior_test_sequence = self.params.get("ior_test_sequence", '/run/ior/iorflags/*') self.test_oclass = self.params.get("oclass", '/run/test_obj_class/*') # Recreate the client hostfile without slots defined self.hostfile_clients = write_host_file(self.hostlist_clients, self.workdir, None) self.pool = None self.dmg_command.exit_status_exception = True def run_online_drain_test(self, num_pool, oclass=None, app_name="ior"): """Run the Online drain without data. Args: num_pool(int) : total pools to create for testing purposes. oclass(str) : Object class type (RP_2G1, etc) app_name(str) : application to run on parallel (ior or mdtest) Defaults to ior. """ # Create a pool self.pool = [] target_list = [] if oclass is None: oclass = self.ior_cmd.dfs_oclass.value test_seq = self.ior_test_sequence[0] drain_servers = (len(self.hostlist_servers) * 2) - 1 # Exclude target : random two targets (target idx : 0-7) n = random.randint(0, 6) target_list.append(n) target_list.append(n + 1) t_string = "{},{}".format(target_list[0], target_list[1]) # Drain one of the ranks (or server) rank = random.randint(1, drain_servers) for val in range(0, num_pool): self.pool.append(self.get_pool()) self.pool[-1].set_property("reclaim", "disabled") # Drain the rank and targets for val in range(0, num_pool): threads = [] # Instantiate aggregation if self.test_during_aggregation is True: for _ in range(0, 2): self.run_ior_thread("Write", oclass, test_seq) self.delete_extra_container(self.pool[val]) # The following thread runs while performing osa operations. if app_name == "ior": threads.append( threading.Thread(target=self.run_ior_thread, kwargs={ "action": "Write", "oclass": oclass, "test": test_seq })) else: threads.append(threading.Thread(target=self.run_mdtest_thread)) # Launch the IOR threads for thrd in threads: self.log.info("Thread : %s", thrd) thrd.start() time.sleep(1) # Wait the threads to write some data before drain. time.sleep(5) self.pool[val].display_pool_daos_space("Pool space: Beginning") pver_begin = self.get_pool_version() self.log.info("Pool Version at the beginning %s", pver_begin) output = self.dmg_command.pool_drain(self.pool[val].uuid, rank, t_string) self.print_and_assert_on_rebuild_failure(output) pver_drain = self.get_pool_version() self.log.info("Pool Version after drain %s", pver_drain) # Check pool version incremented after pool exclude self.assertTrue(pver_drain > pver_begin, "Pool Version Error: After drain") # Wait to finish the threads for thrd in threads: thrd.join() if not self.out_queue.empty(): self.assert_on_exception() for val in range(0, num_pool): display_string = "Pool{} space at the End".format(val) self.pool[val].display_pool_daos_space(display_string) self.run_ior_thread("Read", oclass, test_seq) self.container = self.pool_cont_dict[self.pool[val]][0] kwargs = {"pool": self.pool[val].uuid, "cont": self.container.uuid} output = self.daos_command.container_check(**kwargs) self.log.info(output) @skipForTicket("DAOS-7289") def test_osa_online_drain(self): """Test ID: DAOS-4750 Test Description: Validate Online drain with checksum enabled. :avocado: tags=all,pr,daily_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa,checksum :avocado: tags=osa_drain,online_drain,online_drain_with_csum """ self.log.info("Online Drain : With Checksum") self.run_online_drain_test(1) @skipForTicket("DAOS-7289") def test_osa_online_drain_no_csum(self): """Test ID: DAOS-6909 Test Description: Validate Online drain without enabling checksum. :avocado: tags=all,pr,full_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa :avocado: tags=osa_drain,online_drain,online_drain_without_csum """ self.log.info("Online Drain : No Checksum") self.test_with_checksum = self.params.get("test_with_checksum", '/run/checksum/*') self.run_online_drain_test(1) @skipForTicket("DAOS-7289") def test_osa_online_drain_oclass(self): """Test ID: DAOS-6909 Test Description: Validate Online drain with different object class. :avocado: tags=all,pr,full_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa,checksum :avocado: tags=osa_drain,online_drain,online_drain_oclass """ self.log.info("Online Drain : Oclass") for oclass in self.test_oclass: self.run_online_drain_test(1, oclass=oclass) @skipForTicket("DAOS-7289") def test_osa_online_drain_with_aggregation(self): """Test ID: DAOS-6909 Test Description: Validate Online drain with different object class. :avocado: tags=all,pr,full_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa,checksum :avocado: tags=osa_drain,online_drain,online_drain_with_aggregation """ self.log.info("Online Drain : Aggregation") self.test_during_aggregation = self.params.get("test_with_aggregation", '/run/aggregation/*') self.run_online_drain_test(1) @skipForTicket("DAOS-7289") def test_osa_online_drain_mdtest(self): """Test ID: DAOS-4750 Test Description: Validate Online drain with mdtest running during the testing. :avocado: tags=all,pr,daily_regression :avocado: tags=hw,medium,ib2 :avocado: tags=osa,checksum :avocado: tags=osa_drain,online_drain,online_drain_mdtest """ self.log.info("Online Drain : With Mdtest") self.run_online_drain_test(1, app_name="mdtest")