Пример #1
0
    def run_offline_extend_test(self, num_pool, data=False, oclass=None):
        """Run the offline extend without data.

        Args:
            num_pool (int) : total pools to create for testing purposes.
            data (bool) : whether pool has no data or to create
                          some data in pool. Defaults to False.
            oclass (list) : list of daos object class (eg: "RP_2G8")
        """
        # Create a pool
        label_generator = LabelGenerator()
        pool = {}
        if oclass is None:
            oclass = []
            oclass.append(self.ior_cmd.dfs_oclass.value)

        self.log.info(oclass[0])

        for val in range(0, num_pool):
            # Perform IOR write using the oclass list
            if val < len(oclass):
                index = val
            else:
                index = 0
            pool[val] = TestPool(context=self.context,
                                 dmg_command=self.get_dmg_command(),
                                 label_generator=label_generator)
            pool[val].get_params(self)
            pool[val].create()
            self.pool = pool[val]
            test_seq = self.ior_test_sequence[0]
            self.pool.set_property("reclaim", "disabled")
            if data:
                self.run_ior_thread("Write", oclass[index], test_seq)
                self.run_mdtest_thread(oclass[index])
                if self.test_during_aggregation is True:
                    self.run_ior_thread("Write", oclass[index], test_seq)
                if self.test_with_snapshot is True:
                    # Create a snapshot of the container
                    # after IOR job completes.
                    self.container.create_snap()
                    self.log.info("Created container snapshot: %s",
                                  self.container.epoch)
        # Start the additional servers and extend the pool
        self.log.info("Extra Servers = %s", self.extra_servers)
        self.start_additional_servers(self.extra_servers)
        # Give sometime for the additional server to come up.
        for retry in range(0, 10):
            scan_info = self.get_dmg_command().system_query()
            if not check_system_query_status(scan_info):
                if retry == 9:
                    self.fail("One or more servers not in expected status")
            else:
                break

        for rank_index, rank_val in enumerate(self.rank):
            # If total pools less than 3, extend only a single pool.
            # If total pools >= 3  : Extend only 3 pools.
            if num_pool >= len(self.rank):
                val = rank_index
            else:
                val = 0
            self.pool = pool[val]
            self.pool.display_pool_daos_space("Pool space: Beginning")
            pver_begin = self.get_pool_version()
            self.log.info("Pool Version at the beginning %s", pver_begin)
            # Enable aggregation for multiple pool testing only.
            if self.test_during_aggregation is True and (num_pool > 1):
                self.delete_extra_container(self.pool)
            output = self.dmg_command.pool_extend(self.pool.uuid, rank_val)
            self.print_and_assert_on_rebuild_failure(output)

            pver_extend = self.get_pool_version()
            self.log.info("Pool Version after extend %d", pver_extend)
            # Check pool version incremented after pool extend
            self.assertTrue(pver_extend > pver_begin,
                            "Pool Version Error:  After extend")

            display_string = "Pool{} space at the End".format(val)
            pool[val].display_pool_daos_space(display_string)

            if data:
                # Perform the IOR read using the same
                # daos object class used for write.
                if val < len(oclass):
                    index = val
                else:
                    index = 0
                self.run_ior_thread("Read", oclass[index], test_seq)
                self.run_mdtest_thread(oclass[index])
                self.container = self.pool_cont_dict[self.pool][0]
                kwargs = {"pool": self.pool.uuid, "cont": self.container.uuid}
                output = self.daos_command.container_check(**kwargs)
                self.log.info(output)
Пример #2
0
    def run_offline_parallel_test(self, num_pool, data=False, oclass=None):
        """Run multiple OSA commands in parallel with or without data.
            Args:
            num_pool (int) : total pools to create for testing purposes.
            data (bool) : whether pool has no data or to create
                          some data in pool. Defaults to False.
            oclass (str) : Daos object class (RP_2G1,etc)
        """
        # Create a pool
        label_generator = LabelGenerator()
        pool = {}
        pool_uuid = []
        target_list = []
        if oclass is None:
            oclass = self.ior_cmd.dfs_oclass.value

        # Exclude target : random two targets (target idx : 0-7)
        n = random.randint(0, 6) #nosec
        target_list.append(n)
        target_list.append(n+1)
        t_string = "{},{}".format(target_list[0], target_list[1])

        # Exclude rank 2.
        rank = 2

        test_seq = self.ior_test_sequence[0]
        for val in range(0, num_pool):
            pool[val] = TestPool(
                context=self.context, dmg_command=self.get_dmg_command(),
                label_generator=label_generator)
            pool[val].get_params(self)
            pool[val].create()
            self.pool = pool[val]
            pool_uuid.append(self.pool.uuid)
            # Use only pool UUID while running the test.
            self.pool.use_label = False
            self.pool.set_property("reclaim", "disabled")

            if data:
                self.run_ior_thread("Write", oclass, test_seq)
                if oclass != "S1":
                    self.run_mdtest_thread()
                # if self.test_during_aggregation is set,
                # Create another container and run the IOR
                # command using the second container.
                if self.test_during_aggregation is True:
                    self.run_ior_thread("Write", oclass, test_seq)

        # Start the additional servers and extend the pool
        self.log.info("Extra Servers = %s", self.extra_servers)
        self.start_additional_servers(self.extra_servers)
        # Give sometime for the additional server to come up.
        for retry in range(0, 10):
            scan_info = self.get_dmg_command().system_query()
            if not check_system_query_status(scan_info):
                if retry == 9:
                    self.fail("One or more servers not in expected status")
            else:
                break

        # Exclude and reintegrate the pool_uuid, rank and targets
        for val in range(0, num_pool):
            self.pool = pool[val]
            self.pool.display_pool_daos_space("Pool space: Beginning")
            pver_begin = self.get_pool_version()
            self.log.info("Pool Version at the beginning %s", pver_begin)
            # If we need to trigger aggregation on pool 1, delete
            # the second container which has IOR data.
            if self.test_during_aggregation is True and val == 0:
                self.delete_extra_container(self.pool)
            # Create the threads here
            threads = []
            # Action dictionary with OSA dmg command parameters
            action_args = {
                "drain": {"pool": self.pool.uuid, "rank": rank,
                          "tgt_idx": None},
                "exclude": {"pool": self.pool.uuid, "rank": (rank + 1),
                            "tgt_idx": t_string},
                "reintegrate": {"pool": self.pool.uuid, "rank": (rank + 1),
                                "tgt_idx": t_string},
                "extend": {"pool": self.pool.uuid, "ranks": (rank + 2),
                           "scm_size": self.pool.scm_size,
                           "nvme_size": self.pool.nvme_size}
            }
            for action in sorted(action_args):
                # Add a dmg thread
                process = threading.Thread(target=self.dmg_thread,
                                           kwargs={"action": action,
                                                   "action_args":
                                                   action_args,
                                                   "results":
                                                   self.out_queue})
                process.start()
                threads.append(process)

        # Wait to finish the threads
        for thrd in threads:
            thrd.join()
            time.sleep(5)

        # Check the queue for any failure.
        tmp_list = list(self.out_queue.queue)
        for failure in tmp_list:
            if "FAIL" in failure:
                self.fail("Test failed : {0}".format(failure))

        for val in range(0, num_pool):
            self.pool = pool[val]
            display_string = "Pool{} space at the End".format(val)
            self.pool.display_pool_daos_space(display_string)
            self.is_rebuild_done(3)
            self.assert_on_rebuild_failure()
            pver_end = self.get_pool_version()
            self.log.info("Pool Version at the End %s", pver_end)
            if self.server_boot is True:
                self.assertTrue(pver_end >= 17,
                                "Pool Version Error:  at the end")
            else:
                self.assertTrue(pver_end >= 25,
                                "Pool Version Error:  at the end")


        # Finally run IOR to read the data and perform daos_container_check
        for val in range(0, num_pool):
            self.pool = pool[val]
            if data:
                self.run_ior_thread("Read", oclass, test_seq)
                if oclass != "S1":
                    self.run_mdtest_thread()
                self.container = self.pool_cont_dict[self.pool][0]
                kwargs = {"pool": self.pool.uuid,
                          "cont": self.container.uuid}
                output = self.daos_command.container_check(**kwargs)
                self.log.info(output)
Пример #3
0
    def run_online_reintegration_test(self,
                                      num_pool,
                                      racer=False,
                                      server_boot=False,
                                      oclass=None):
        """Run the Online reintegration without data.

        Args:
            num_pool (int) : total pools to create for testing purposes.
            data (bool) : whether pool has no data or to create
                          some data in pool. Defaults to False.
            server_boot (bool) : Perform system stop/start on a rank.
                                 Defults to False.
            oclass (str) : daos object class string (eg: "RP_2G8").
                           Defaults to None.
        """
        if oclass is None:
            oclass = self.ior_cmd.dfs_oclass.value
        test_seq = self.ior_test_sequence[0]
        # Create a pool
        label_generator = LabelGenerator()
        pool = {}
        exclude_servers = (len(self.hostlist_servers) * 2) - 1

        # Exclude one rank : other than rank 0.
        rank = random.randint(1, exclude_servers)  #nosec

        # Start the daos_racer thread
        if racer is True:
            daos_racer_thread = threading.Thread(target=self.daos_racer_thread)
            daos_racer_thread.start()
            time.sleep(30)

        for val in range(0, num_pool):
            pool[val] = TestPool(context=self.context,
                                 dmg_command=self.get_dmg_command(),
                                 label_generator=label_generator)
            pool[val].get_params(self)
            pool[val].create()
            pool[val].set_property("reclaim", "disabled")

        # Exclude and reintegrate the pool_uuid, rank and targets
        for val in range(0, num_pool):
            threads = []
            self.pool = pool[val]
            # Instantiate aggregation
            if self.test_during_aggregation is True:
                for _ in range(0, 2):
                    self.run_ior_thread("Write", oclass, test_seq)
                self.delete_extra_container(self.pool)
            # The following thread runs while performing osa operations.
            threads.append(
                threading.Thread(target=self.run_ior_thread,
                                 kwargs={
                                     "action": "Write",
                                     "oclass": oclass,
                                     "test": test_seq
                                 }))

            # Launch the IOR threads
            for thrd in threads:
                self.log.info("Thread : %s", thrd)
                thrd.start()
                time.sleep(1)
            self.pool.display_pool_daos_space("Pool space: Beginning")
            pver_begin = self.get_pool_version()
            self.log.info("Pool Version at the beginning %s", pver_begin)
            if server_boot is False:
                output = self.dmg_command.pool_exclude(self.pool.uuid, rank)
            else:
                output = self.dmg_command.system_stop(ranks=rank, force=True)
                self.pool.wait_for_rebuild(False)
                self.log.info(output)
                output = self.dmg_command.system_start(ranks=rank)

            self.print_and_assert_on_rebuild_failure(output)
            pver_exclude = self.get_pool_version()

            self.log.info("Pool Version after exclude %s", pver_exclude)
            # Check pool version incremented after pool exclude
            # pver_exclude should be greater than
            # pver_begin + 8 targets.
            self.assertTrue(pver_exclude > (pver_begin + 8),
                            "Pool Version Error:  After exclude")
            output = self.dmg_command.pool_reintegrate(self.pool.uuid, rank)
            self.print_and_assert_on_rebuild_failure(output)

            pver_reint = self.get_pool_version()
            self.log.info("Pool Version after reintegrate %d", pver_reint)
            # Check pool version incremented after pool reintegrate
            self.assertTrue(pver_reint > (pver_exclude + 1),
                            "Pool Version Error:  After reintegrate")
            # Wait to finish the threads
            for thrd in threads:
                thrd.join()
                if not self.out_queue.empty():
                    self.assert_on_exception()

        # Check data consistency for IOR in future
        # Presently, we are running daos_racer in parallel
        # to IOR and checking the data consistency only
        # for the daos_racer objects after exclude
        # and reintegration.
        if racer is True:
            daos_racer_thread.join()

        for val in range(0, num_pool):
            display_string = "Pool{} space at the End".format(val)
            self.pool = pool[val]
            self.pool.display_pool_daos_space(display_string)
            self.run_ior_thread("Read", oclass, test_seq)
            self.container = self.pool_cont_dict[self.pool][0]
            kwargs = {"pool": self.pool.uuid, "cont": self.container.uuid}
            output = self.daos_command.container_check(**kwargs)
            self.log.info(output)
Пример #4
0
    def run_nvme_pool_exclude(self, num_pool, oclass=None):
        """This is the main method which performs the actual
        testing. It does the following jobs:
        - Create number of TestPools
        - Start the IOR threads for running on each pools.
        - On each pool do the following:
            - Perform an IOR write (using a container)
            - Exclude a daos_server
            - Perform an IOR read/verify (same container used for write)
        Args:
            num_pool (int) : total pools to create for testing purposes.
            oclass (str) : object class (eg: RP_2G8, S1,etc).
                           Defaults to None
        """
        # Create a pool
        label_generator = LabelGenerator()
        pool = {}

        if oclass is None:
            oclass = self.ior_cmd.dfs_oclass.value

        # Exclude rank :  ranks other than rank 0.
        exclude_servers = len(self.hostlist_servers) * 2
        rank_list = list(range(1, exclude_servers))

        for val in range(0, num_pool):
            pool[val] = TestPool(
                context=self.context, dmg_command=self.dmg_command,
                label_generator=label_generator)
            pool[val].get_params(self)
            pool[val].create()
            pool[val].set_property("reclaim", "disabled")

        for val in range(0, num_pool):
            self.pool = pool[val]
            self.add_container(self.pool)
            self.cont_list.append(self.container)
            rf = ''.join(self.container.properties.value.split(":"))
            rf_num = int(re.search(r"rf([0-9]+)", rf).group(1))
            for test in range(0, rf_num):
                threads = []
                threads.append(threading.Thread(target=self.run_ior_thread,
                                                kwargs={"action": "Write",
                                                        "oclass": oclass,
                                                        "test": test}))
                # Launch the IOR threads
                for thrd in threads:
                    self.log.info("Thread : %s", thrd)
                    thrd.start()
                    time.sleep(1)

                self.pool.display_pool_daos_space("Pool space: Before Exclude")
                pver_begin = self.get_pool_version()

                index = random.randint(1, len(rank_list)) #nosec
                rank = rank_list.pop(index-1)
                tgt_exclude = random.randint(1, 6) #nosec
                self.log.info("Removing rank %d, target %d", rank, tgt_exclude)

                self.log.info("Pool Version at the beginning %s", pver_begin)
                output = self.dmg_command.pool_exclude(self.pool.uuid,
                                                       rank, tgt_exclude)
                self.print_and_assert_on_rebuild_failure(output)

                pver_exclude = self.get_pool_version()
                self.log.info("Pool Version after exclude %s", pver_exclude)
                # Check pool version incremented after pool exclude
                self.assertTrue(pver_exclude > pver_begin,
                                "Pool Version Error:  After exclude")
                # Wait to finish the threads
                for thrd in threads:
                    thrd.join()
                    if not self.out_queue.empty():
                        self.assert_on_exception()
                # Verify the data after pool exclude
                self.run_ior_thread("Read", oclass, test)
                display_string = "Pool{} space at the End".format(val)
                self.pool.display_pool_daos_space(display_string)
                kwargs = {"pool": self.pool.uuid,
                          "cont": self.container.uuid}
                output = self.daos_command.container_check(**kwargs)
                self.log.info(output)