def run_offline_extend_test(self, num_pool, data=False, oclass=None): """Run the offline extend without data. Args: num_pool (int) : total pools to create for testing purposes. data (bool) : whether pool has no data or to create some data in pool. Defaults to False. oclass (list) : list of daos object class (eg: "RP_2G8") """ # Create a pool label_generator = LabelGenerator() pool = {} if oclass is None: oclass = [] oclass.append(self.ior_cmd.dfs_oclass.value) self.log.info(oclass[0]) for val in range(0, num_pool): # Perform IOR write using the oclass list if val < len(oclass): index = val else: index = 0 pool[val] = TestPool(context=self.context, dmg_command=self.get_dmg_command(), label_generator=label_generator) pool[val].get_params(self) pool[val].create() self.pool = pool[val] test_seq = self.ior_test_sequence[0] self.pool.set_property("reclaim", "disabled") if data: self.run_ior_thread("Write", oclass[index], test_seq) self.run_mdtest_thread(oclass[index]) if self.test_during_aggregation is True: self.run_ior_thread("Write", oclass[index], test_seq) if self.test_with_snapshot is True: # Create a snapshot of the container # after IOR job completes. self.container.create_snap() self.log.info("Created container snapshot: %s", self.container.epoch) # Start the additional servers and extend the pool self.log.info("Extra Servers = %s", self.extra_servers) self.start_additional_servers(self.extra_servers) # Give sometime for the additional server to come up. for retry in range(0, 10): scan_info = self.get_dmg_command().system_query() if not check_system_query_status(scan_info): if retry == 9: self.fail("One or more servers not in expected status") else: break for rank_index, rank_val in enumerate(self.rank): # If total pools less than 3, extend only a single pool. # If total pools >= 3 : Extend only 3 pools. if num_pool >= len(self.rank): val = rank_index else: val = 0 self.pool = pool[val] self.pool.display_pool_daos_space("Pool space: Beginning") pver_begin = self.get_pool_version() self.log.info("Pool Version at the beginning %s", pver_begin) # Enable aggregation for multiple pool testing only. if self.test_during_aggregation is True and (num_pool > 1): self.delete_extra_container(self.pool) output = self.dmg_command.pool_extend(self.pool.uuid, rank_val) self.print_and_assert_on_rebuild_failure(output) pver_extend = self.get_pool_version() self.log.info("Pool Version after extend %d", pver_extend) # Check pool version incremented after pool extend self.assertTrue(pver_extend > pver_begin, "Pool Version Error: After extend") display_string = "Pool{} space at the End".format(val) pool[val].display_pool_daos_space(display_string) if data: # Perform the IOR read using the same # daos object class used for write. if val < len(oclass): index = val else: index = 0 self.run_ior_thread("Read", oclass[index], test_seq) self.run_mdtest_thread(oclass[index]) self.container = self.pool_cont_dict[self.pool][0] kwargs = {"pool": self.pool.uuid, "cont": self.container.uuid} output = self.daos_command.container_check(**kwargs) self.log.info(output)
def run_offline_parallel_test(self, num_pool, data=False, oclass=None): """Run multiple OSA commands in parallel with or without data. Args: num_pool (int) : total pools to create for testing purposes. data (bool) : whether pool has no data or to create some data in pool. Defaults to False. oclass (str) : Daos object class (RP_2G1,etc) """ # Create a pool label_generator = LabelGenerator() pool = {} pool_uuid = [] target_list = [] if oclass is None: oclass = self.ior_cmd.dfs_oclass.value # Exclude target : random two targets (target idx : 0-7) n = random.randint(0, 6) #nosec target_list.append(n) target_list.append(n+1) t_string = "{},{}".format(target_list[0], target_list[1]) # Exclude rank 2. rank = 2 test_seq = self.ior_test_sequence[0] for val in range(0, num_pool): pool[val] = TestPool( context=self.context, dmg_command=self.get_dmg_command(), label_generator=label_generator) pool[val].get_params(self) pool[val].create() self.pool = pool[val] pool_uuid.append(self.pool.uuid) # Use only pool UUID while running the test. self.pool.use_label = False self.pool.set_property("reclaim", "disabled") if data: self.run_ior_thread("Write", oclass, test_seq) if oclass != "S1": self.run_mdtest_thread() # if self.test_during_aggregation is set, # Create another container and run the IOR # command using the second container. if self.test_during_aggregation is True: self.run_ior_thread("Write", oclass, test_seq) # Start the additional servers and extend the pool self.log.info("Extra Servers = %s", self.extra_servers) self.start_additional_servers(self.extra_servers) # Give sometime for the additional server to come up. for retry in range(0, 10): scan_info = self.get_dmg_command().system_query() if not check_system_query_status(scan_info): if retry == 9: self.fail("One or more servers not in expected status") else: break # Exclude and reintegrate the pool_uuid, rank and targets for val in range(0, num_pool): self.pool = pool[val] self.pool.display_pool_daos_space("Pool space: Beginning") pver_begin = self.get_pool_version() self.log.info("Pool Version at the beginning %s", pver_begin) # If we need to trigger aggregation on pool 1, delete # the second container which has IOR data. if self.test_during_aggregation is True and val == 0: self.delete_extra_container(self.pool) # Create the threads here threads = [] # Action dictionary with OSA dmg command parameters action_args = { "drain": {"pool": self.pool.uuid, "rank": rank, "tgt_idx": None}, "exclude": {"pool": self.pool.uuid, "rank": (rank + 1), "tgt_idx": t_string}, "reintegrate": {"pool": self.pool.uuid, "rank": (rank + 1), "tgt_idx": t_string}, "extend": {"pool": self.pool.uuid, "ranks": (rank + 2), "scm_size": self.pool.scm_size, "nvme_size": self.pool.nvme_size} } for action in sorted(action_args): # Add a dmg thread process = threading.Thread(target=self.dmg_thread, kwargs={"action": action, "action_args": action_args, "results": self.out_queue}) process.start() threads.append(process) # Wait to finish the threads for thrd in threads: thrd.join() time.sleep(5) # Check the queue for any failure. tmp_list = list(self.out_queue.queue) for failure in tmp_list: if "FAIL" in failure: self.fail("Test failed : {0}".format(failure)) for val in range(0, num_pool): self.pool = pool[val] display_string = "Pool{} space at the End".format(val) self.pool.display_pool_daos_space(display_string) self.is_rebuild_done(3) self.assert_on_rebuild_failure() pver_end = self.get_pool_version() self.log.info("Pool Version at the End %s", pver_end) if self.server_boot is True: self.assertTrue(pver_end >= 17, "Pool Version Error: at the end") else: self.assertTrue(pver_end >= 25, "Pool Version Error: at the end") # Finally run IOR to read the data and perform daos_container_check for val in range(0, num_pool): self.pool = pool[val] if data: self.run_ior_thread("Read", oclass, test_seq) if oclass != "S1": self.run_mdtest_thread() self.container = self.pool_cont_dict[self.pool][0] kwargs = {"pool": self.pool.uuid, "cont": self.container.uuid} output = self.daos_command.container_check(**kwargs) self.log.info(output)
def run_online_reintegration_test(self, num_pool, racer=False, server_boot=False, oclass=None): """Run the Online reintegration without data. Args: num_pool (int) : total pools to create for testing purposes. data (bool) : whether pool has no data or to create some data in pool. Defaults to False. server_boot (bool) : Perform system stop/start on a rank. Defults to False. oclass (str) : daos object class string (eg: "RP_2G8"). Defaults to None. """ if oclass is None: oclass = self.ior_cmd.dfs_oclass.value test_seq = self.ior_test_sequence[0] # Create a pool label_generator = LabelGenerator() pool = {} exclude_servers = (len(self.hostlist_servers) * 2) - 1 # Exclude one rank : other than rank 0. rank = random.randint(1, exclude_servers) #nosec # Start the daos_racer thread if racer is True: daos_racer_thread = threading.Thread(target=self.daos_racer_thread) daos_racer_thread.start() time.sleep(30) for val in range(0, num_pool): pool[val] = TestPool(context=self.context, dmg_command=self.get_dmg_command(), label_generator=label_generator) pool[val].get_params(self) pool[val].create() pool[val].set_property("reclaim", "disabled") # Exclude and reintegrate the pool_uuid, rank and targets for val in range(0, num_pool): threads = [] self.pool = pool[val] # Instantiate aggregation if self.test_during_aggregation is True: for _ in range(0, 2): self.run_ior_thread("Write", oclass, test_seq) self.delete_extra_container(self.pool) # The following thread runs while performing osa operations. threads.append( threading.Thread(target=self.run_ior_thread, kwargs={ "action": "Write", "oclass": oclass, "test": test_seq })) # Launch the IOR threads for thrd in threads: self.log.info("Thread : %s", thrd) thrd.start() time.sleep(1) self.pool.display_pool_daos_space("Pool space: Beginning") pver_begin = self.get_pool_version() self.log.info("Pool Version at the beginning %s", pver_begin) if server_boot is False: output = self.dmg_command.pool_exclude(self.pool.uuid, rank) else: output = self.dmg_command.system_stop(ranks=rank, force=True) self.pool.wait_for_rebuild(False) self.log.info(output) output = self.dmg_command.system_start(ranks=rank) self.print_and_assert_on_rebuild_failure(output) pver_exclude = self.get_pool_version() self.log.info("Pool Version after exclude %s", pver_exclude) # Check pool version incremented after pool exclude # pver_exclude should be greater than # pver_begin + 8 targets. self.assertTrue(pver_exclude > (pver_begin + 8), "Pool Version Error: After exclude") output = self.dmg_command.pool_reintegrate(self.pool.uuid, rank) self.print_and_assert_on_rebuild_failure(output) pver_reint = self.get_pool_version() self.log.info("Pool Version after reintegrate %d", pver_reint) # Check pool version incremented after pool reintegrate self.assertTrue(pver_reint > (pver_exclude + 1), "Pool Version Error: After reintegrate") # Wait to finish the threads for thrd in threads: thrd.join() if not self.out_queue.empty(): self.assert_on_exception() # Check data consistency for IOR in future # Presently, we are running daos_racer in parallel # to IOR and checking the data consistency only # for the daos_racer objects after exclude # and reintegration. if racer is True: daos_racer_thread.join() for val in range(0, num_pool): display_string = "Pool{} space at the End".format(val) self.pool = pool[val] self.pool.display_pool_daos_space(display_string) self.run_ior_thread("Read", oclass, test_seq) self.container = self.pool_cont_dict[self.pool][0] kwargs = {"pool": self.pool.uuid, "cont": self.container.uuid} output = self.daos_command.container_check(**kwargs) self.log.info(output)
def run_nvme_pool_exclude(self, num_pool, oclass=None): """This is the main method which performs the actual testing. It does the following jobs: - Create number of TestPools - Start the IOR threads for running on each pools. - On each pool do the following: - Perform an IOR write (using a container) - Exclude a daos_server - Perform an IOR read/verify (same container used for write) Args: num_pool (int) : total pools to create for testing purposes. oclass (str) : object class (eg: RP_2G8, S1,etc). Defaults to None """ # Create a pool label_generator = LabelGenerator() pool = {} if oclass is None: oclass = self.ior_cmd.dfs_oclass.value # Exclude rank : ranks other than rank 0. exclude_servers = len(self.hostlist_servers) * 2 rank_list = list(range(1, exclude_servers)) for val in range(0, num_pool): pool[val] = TestPool( context=self.context, dmg_command=self.dmg_command, label_generator=label_generator) pool[val].get_params(self) pool[val].create() pool[val].set_property("reclaim", "disabled") for val in range(0, num_pool): self.pool = pool[val] self.add_container(self.pool) self.cont_list.append(self.container) rf = ''.join(self.container.properties.value.split(":")) rf_num = int(re.search(r"rf([0-9]+)", rf).group(1)) for test in range(0, rf_num): threads = [] threads.append(threading.Thread(target=self.run_ior_thread, kwargs={"action": "Write", "oclass": oclass, "test": test})) # Launch the IOR threads for thrd in threads: self.log.info("Thread : %s", thrd) thrd.start() time.sleep(1) self.pool.display_pool_daos_space("Pool space: Before Exclude") pver_begin = self.get_pool_version() index = random.randint(1, len(rank_list)) #nosec rank = rank_list.pop(index-1) tgt_exclude = random.randint(1, 6) #nosec self.log.info("Removing rank %d, target %d", rank, tgt_exclude) self.log.info("Pool Version at the beginning %s", pver_begin) output = self.dmg_command.pool_exclude(self.pool.uuid, rank, tgt_exclude) self.print_and_assert_on_rebuild_failure(output) pver_exclude = self.get_pool_version() self.log.info("Pool Version after exclude %s", pver_exclude) # Check pool version incremented after pool exclude self.assertTrue(pver_exclude > pver_begin, "Pool Version Error: After exclude") # Wait to finish the threads for thrd in threads: thrd.join() if not self.out_queue.empty(): self.assert_on_exception() # Verify the data after pool exclude self.run_ior_thread("Read", oclass, test) display_string = "Pool{} space at the End".format(val) self.pool.display_pool_daos_space(display_string) kwargs = {"pool": self.pool.uuid, "cont": self.container.uuid} output = self.daos_command.container_check(**kwargs) self.log.info(output)