def test_ior_intercept_verify_data(self):
        """Jira ID: DAOS-3502.

        Test Description:
            Purpose of this test is to run ior through dfuse with
            interception library  on 5 clients and without interception
            library on 1 client for at least 30 minutes and verify the
            data integrity using ior's Read Verify and Write Verify
            options.

        Use case:
            Run ior with read, write, fpp, read verify
            write verify for 30 minutes
            Run ior with read, write, read verify
            write verify for 30 minutes

        :avocado: tags=all,full_regression,hw,large
        :avocado: tags=daosio,iorinterceptverifydata
        """
        intercept = os.path.join(self.prefix, 'lib64', 'libioil.so')
        with_intercept = dict()
        self.run_multiple_ior_with_pool(with_intercept, intercept)

        IorCommand.log_metrics(self.log,
                               "5 clients - with " + "interception library",
                               with_intercept[1])
        IorCommand.log_metrics(self.log,
                               "1 client - without " + "interception library",
                               with_intercept[2])
    def test_ior_intercept_multi_client(self):
        """Jira ID: DAOS-3499.

        Test Description:
            Purpose of this test is to run ior through dfuse in multiple
            clients for 5 minutes and capture the metrics and use the
            intercepiton library by exporting LD_PRELOAD to the libioil.so
            path and rerun the above ior and capture the metrics and
            compare the performance difference and check using interception
            library make significant performance improvement.

        Use case:
            Run ior with read, write for 5 minutes
            Run ior with read, write for 5 minutes with interception library
            Compare the results and check whether using interception
                library provides better performance.

        :avocado: tags=all,full_regression,hw,large
        :avocado: tags=daosio,iorinterceptmulticlient
        """
        suffix = self.ior_cmd.transfer_size.value
        out = self.run_ior_with_pool(test_file_suffix=suffix)
        without_intercept = IorCommand.get_ior_metrics(out)
        intercept = os.path.join(self.prefix, 'lib64', 'libioil.so')
        suffix = suffix + "intercept"
        out = self.run_ior_with_pool(intercept, test_file_suffix=suffix)
        with_intercept = IorCommand.get_ior_metrics(out)
        max_mib = int(IorMetrics.Max_MiB)
        min_mib = int(IorMetrics.Min_MiB)
        mean_mib = int(IorMetrics.Mean_MiB)

        write_x = self.params.get("write_x", "/run/ior/iorflags/ssf/*", 1)

        # Verifying write performance
        self.assertTrue(
            float(with_intercept[0][max_mib]) > write_x *
            float(without_intercept[0][max_mib]))
        self.assertTrue(
            float(with_intercept[0][min_mib]) > write_x *
            float(without_intercept[0][min_mib]))
        self.assertTrue(
            float(with_intercept[0][mean_mib]) > write_x *
            float(without_intercept[0][mean_mib]))

        # Verifying read performance
        # The read performance is almost same with or without intercept
        # library. But arbitarily the read performance with interception
        # library can be bit lower than without it. Verifying that it is
        # not drastically lower by checking it is at least  60% or above.
        read_x = 0.6
        self.assertTrue(
            float(with_intercept[1][max_mib]) > read_x *
            float(without_intercept[1][max_mib]))
        self.assertTrue(
            float(with_intercept[1][min_mib]) > read_x *
            float(without_intercept[1][min_mib]))
        self.assertTrue(
            float(with_intercept[1][mean_mib]) > read_x *
            float(without_intercept[1][mean_mib]))
Пример #3
0
    def test_ior_intercept(self):
        """Jira ID: DAOS-3498.

        Test Description:
            Purpose of this test is to run ior using dfuse for 5 minutes
            and capture the metrics and use the intercepiton library by
            exporting LD_PRELOAD to the libioil.so path and rerun the
            above ior and capture the metrics and compare the
            performance difference and check using interception
            library make significant performance improvement.

        Use case:
            Run ior with read, write, CheckWrite, CheckRead
                for 5 minutes
            Run ior with read, write, CheckWrite, CheckRead
                for 5 minutes with interception library
            Compare the results and check whether using interception
                library provides better performance.

        :avocado: tags=all,full_regression,hw,small,daosio,iorinterceptbasic
        """
        apis = self.params.get("ior_api", '/run/ior/iorflags/ssf/*')
        for api in apis:
            self.ior_cmd.api.update(api)
            out = self.run_ior_with_pool(fail_on_warning=False)
            without_intercept = IorCommand.get_ior_metrics(out)
            if api == "POSIX":
                intercept = os.path.join(self.prefix, 'lib64', 'libioil.so')
                out = self.run_ior_with_pool(intercept, fail_on_warning=False)
                with_intercept = IorCommand.get_ior_metrics(out)
                max_mib = int(IorMetrics.Max_MiB)
                min_mib = int(IorMetrics.Min_MiB)
                mean_mib = int(IorMetrics.Mean_MiB)
                write_x = self.params.get("write_x", "/run/ior/iorflags/ssf/*",
                                          1)
                read_x = self.params.get("read_x", "/run/ior/iorflags/ssf/*",
                                         1)

                # Verifying write performance
                self.assertTrue(
                    float(with_intercept[0][max_mib]) > write_x *
                    float(without_intercept[0][max_mib]))
                self.assertTrue(
                    float(with_intercept[0][min_mib]) > write_x *
                    float(without_intercept[0][min_mib]))
                self.assertTrue(
                    float(with_intercept[0][mean_mib]) > write_x *
                    float(without_intercept[0][mean_mib]))

                # Verifying read performance
                self.assertTrue(
                    float(with_intercept[1][max_mib]) > read_x *
                    float(without_intercept[1][max_mib]))
                self.assertTrue(
                    float(with_intercept[1][min_mib]) > read_x *
                    float(without_intercept[1][min_mib]))
                self.assertTrue(
                    float(with_intercept[1][mean_mib]) > read_x *
                    float(without_intercept[1][mean_mib]))
Пример #4
0
    def setUp(self):
        """Set up each test case."""
        # obtain separate logs
        self.update_log_file_names()
        # Start the servers and agents
        super(IorTestBase, self).setUp()

        # Get the parameters for IOR
        self.ior_cmd = IorCommand()
        self.ior_cmd.get_params(self)
        self.processes = self.params.get("np", '/run/ior/client_processes/*')
Пример #5
0
    def run_il_perf_check(self):
        """Verify IOR performance with DFUSE + IL is similar to DFS.

        Steps:
            Run IOR with DFS.
            Run IOR with DFUSE + IL.
            Verify performance with DFUSE + IL is similar to DFS.

        """
        # Write and read performance thresholds
        write_x = self.params.get("write_x", self.ior_cmd.namespace, None)
        read_x = self.params.get("read_x", self.ior_cmd.namespace, None)
        if write_x is None or read_x is None:
            self.fail("Failed to get write_x and read_x from config")

        # Run IOR with DFS
        self.ior_cmd.api.update("DFS")
        dfs_out = self.run_ior_with_pool(fail_on_warning=self.log.info)
        dfs_perf = IorCommand.get_ior_metrics(dfs_out)

        # Destroy and use a new pool and container
        self.container.destroy()
        self.container = None
        self.pool.destroy()
        self.pool = None

        # Run IOR with dfuse + IL
        self.ior_cmd.api.update("POSIX")
        dfuse_out = self.run_ior_with_pool(intercept=os.path.join(
            self.prefix, 'lib64', 'libioil.so'),
                                           fail_on_warning=self.log.info)
        dfuse_perf = IorCommand.get_ior_metrics(dfuse_out)

        # Verify write and read performance are within the thresholds.
        # Since Min can have a lot of variance, don't check Min or Mean.
        # Ideally, we might want to look at the Std Dev to ensure the results are admissible.
        dfs_max_write = float(dfs_perf[0][IorMetrics.Max_MiB])
        dfuse_max_write = float(dfuse_perf[0][IorMetrics.Max_MiB])
        actual_write_x = percent_change(dfs_max_write, dfuse_max_write)
        self.log.info("DFS Max Write:      %.2f", dfs_max_write)
        self.log.info("DFUSE IL Max Write: %.2f", dfuse_max_write)
        self.log.info("Percent Diff:       %.2f%%", actual_write_x * 100)
        self.assertLessEqual(abs(actual_write_x), write_x,
                             "Max Write Diff too large")

        dfs_max_read = float(dfs_perf[1][IorMetrics.Max_MiB])
        dfuse_max_read = float(dfuse_perf[1][IorMetrics.Max_MiB])
        actual_read_x = percent_change(dfs_max_read, dfuse_max_read)
        self.log.info("DFS Max Read:      %.2f", dfs_max_read)
        self.log.info("DFUSE IL Max Read: %.2f", dfuse_max_read)
        self.log.info("Percent Diff:      %.2f%%", actual_read_x * 100)
        self.assertLessEqual(abs(actual_read_x), read_x,
                             "Max Read Diff too large")
Пример #6
0
    def create_ior_cmdline(self, job_spec, pool, ppn, nodesperjob):
        """Create an IOR cmdline to run in slurm batch.

        Args:

            job_spec (str):   ior job in yaml to run
            pool (obj):       TestPool obj
            ppn(int):         number of tasks to run on each node
            nodesperjob(int): number of nodes per job

        Returns:
            cmd: cmdline string

        """
        commands = []

        iteration = self.test_iteration
        ior_params = "/run/" + job_spec + "/*"
        # IOR job specs with a list of parameters; update each value
        api_list = self.params.get("api", ior_params + "*")
        tsize_list = self.params.get("transfer_size", ior_params + "*")
        bsize_list = self.params.get("block_size", ior_params + "*")
        oclass_list = self.params.get("daos_oclass", ior_params + "*")
        # check if capable of doing rebuild; if yes then daos_oclass = RP_*GX
        if self.is_harasser("rebuild"):
            oclass_list = self.params.get("daos_oclass", "/run/rebuild/*")
        # update IOR cmdline for each additional IOR obj
        for api in api_list:
            for b_size in bsize_list:
                for t_size in tsize_list:
                    for o_type in oclass_list:
                        ior_cmd = IorCommand()
                        ior_cmd.namespace = ior_params
                        ior_cmd.get_params(self)
                        if iteration is not None and iteration < 0:
                            ior_cmd.repetitions.update(1000000)
                        if self.job_timeout is not None:
                            ior_cmd.max_duration.update(self.job_timeout)
                        else:
                            ior_cmd.max_duration.update(10)
                        ior_cmd.api.update(api)
                        ior_cmd.block_size.update(b_size)
                        ior_cmd.transfer_size.update(t_size)
                        ior_cmd.daos_oclass.update(o_type)
                        ior_cmd.set_daos_params(self.server_group, pool)
                        # srun cmdline
                        nprocs = nodesperjob * ppn
                        env = ior_cmd.get_default_env("srun")
                        if ior_cmd.api.value == "MPIIO":
                            env["DAOS_CONT"] = ior_cmd.daos_cont.value
                        cmd = Srun(ior_cmd)
                        cmd.assign_processes(nprocs)
                        cmd.assign_environment(env, True)
                        cmd.ntasks_per_node.update(ppn)
                        log_name = "{}_{}_{}_{}".format(
                            api, b_size, t_size, o_type)
                        commands.append([cmd.__str__(), log_name])
                        self.log.info("<<IOR cmdline>>: %s \n",
                                      commands[-1].__str__())
        return commands
Пример #7
0
    def setUp(self):
        """Set up each test case."""
        # obtain separate logs
        self.update_log_file_names()
        # Start the servers and agents
        super().setUp()

        # Get the parameters for IOR
        self.ior_cmd = IorCommand()
        self.ior_cmd.get_params(self)
        self.processes = self.params.get("np", '/run/ior/client_processes/*')
        self.ppn = self.params.get("ppn", '/run/ior/client_processes/*')
        self.subprocess = self.params.get("subprocess", '/run/ior/*', False)
        self.ior_timeout = self.params.get("ior_timeout", '/run/ior/*', None)
Пример #8
0
    def setUp(self):
        """Set up each test case."""
        # obtain separate logs
        self.update_log_file_names()
        # Start the servers and agents
        super(IorTestBase, self).setUp()

        # Get the parameters for IOR
        self.ior_cmd = IorCommand()
        self.ior_cmd.get_params(self)
        self.processes = self.params.get("np", '/run/ior/client_processes/*')
        self.subprocess = self.params.get("subprocess", '/run/ior/*', False)

        # lock is needed for run_multiple_ior method.
        self.lock = threading.Lock()
Пример #9
0
    def start_ior_thread(self, results, create_cont, operation):
        """Start IOR write/read threads and wait until all threads are finished.

        Args:
            results (queue): queue for returning thread results
            create_cont (Bool): To create the new container or not.
            operation (str):
                Write/WriteRead: It will Write or Write/Read base on IOR parameter in yaml file.
                Auto_Write/Auto_Read: It will calculate the IOR block size based on requested
                                        storage % to be fill.
        """
        # IOR flag can be Write only or Write/Read based on test yaml
        self.ior_cmd.flags.value = self.ior_default_flags

        # Calculate the block size based on server % to fill up.
        if 'Auto' in operation:
            block_size = self.calculate_ior_block_size()
            self.ior_cmd.block_size.update('{}'.format(block_size))

        # For IOR Read operation update the read flax from yaml file.
        if 'Auto_Read' in operation or operation == "Read":
            create_cont = False
            self.ior_cmd.flags.value = self.ior_read_flags

        # run IOR Command
        try:
            out = self.run_ior_with_pool(create_cont=create_cont,
                                         fail_on_warning=self.fail_on_warning)
            self.ior_matrix = IorCommand.get_ior_metrics(out)
            results.put("PASS")
        except (CommandFailure, TestFail) as _error:
            results.put("FAIL")
Пример #10
0
    def start_ior_thread(self, results, create_cont, operation='WriteRead'):
        """Start IOR write/read threads and wait until all threads are finished.

        Args:
            results (queue): queue for returning thread results
            operation (str): IOR operation for read/write.
                             Default it will do whatever mention in ior_flags
                             set.
        """
        self.ior_cmd.flags.value = self.ior_default_flags

        #For IOR Other operation, calculate the block size based on server %
        #to fill up. Store the container UUID for future reading operation.
        if operation == 'Write':
            block_size = self.calculate_ior_block_size()
            self.ior_cmd.block_size.update('{}'.format(block_size))
        #For IOR Read only operation, retrieve the stored container UUID
        elif operation == 'Read':
            create_cont = False
            self.ior_cmd.flags.value = self.ior_read_flags

        # run IOR Command
        try:
            out = self.run_ior_with_pool(create_cont=create_cont,
                                         fail_on_warning=self.fail_on_warning)
            self.ior_matrix = IorCommand.get_ior_metrics(out)
            results.put("PASS")
        except (CommandFailure, TestFail) as _error:
            results.put("FAIL")
Пример #11
0
    def setUp(self):
        """Set up each test case."""
        # Start the servers and agents
        super(IorTestBase, self).setUp()

        # Get the parameters for IOR
        self.ior_cmd = IorCommand()
        self.ior_cmd.get_params(self)
        self.processes = self.params.get("np", '/run/ior/client_processes/*')
        self.mpiio_oclass = self.params.get("mpiio_oclass", '/run/ior/*')

        # Get the test params
        self.pool = TestPool(self.context, self.log)
        self.pool.get_params(self)

        # Create a pool
        self.pool.create()
Пример #12
0
    def run_ior_collect_error(self, results, job_num, file_name, clients):
        """Run IOR command and store error in results.

        Args:
            results (dict): A dictionary object to store the ior metrics.
            job_num (int): Assigned job number.
            file_name (str): File name used for self.ior_cmd.test_file.
            clients (list): Client hostnames to run IOR from.
        """
        ior_cmd = IorCommand()
        ior_cmd.get_params(self)
        ior_cmd.set_daos_params(
            group=self.server_group, pool=self.pool, cont_uuid=self.container.uuid)
        testfile = os.path.join("/", file_name)
        ior_cmd.test_file.update(testfile)

        manager = get_job_manager(
            test=self, class_name="Mpirun", job=ior_cmd, subprocess=self.subprocess,
            mpi_type="mpich")
        manager.assign_hosts(clients, self.workdir, self.hostfile_clients_slots)
        ppn = self.params.get("ppn", '/run/ior/client_processes/*')
        manager.ppn.update(ppn, 'mpirun.ppn')
        manager.processes.update(None, 'mpirun.np')

        try:
            ior_output = manager.run()
            results[job_num] = [True]
            # For debugging.
            results[job_num].extend(IorCommand.get_ior_metrics(ior_output))
            # We'll verify the error message.
            results[job_num].append(ior_output.stderr_text)
        except CommandFailure as error:
            results[job_num] = [False, "IOR failed: {}".format(error)]
Пример #13
0
 def setUp(self):
     """Set up each test case."""
     # obtain separate logs
     self.update_log_file_names()
     # Start the servers and agents
     super().setUp()
     self.hostfile_clients = None
     self.ior_local_cmd = IorCommand()
     self.ior_local_cmd.get_params(self)
     self.ior_default_flags = self.ior_local_cmd.flags.value
     self.ior_scm_xfersize = self.params.get("transfer_size",
                                             '/run/ior/transfersize_blocksize/*', '2048')
     self.ior_read_flags = self.params.get("read_flags", '/run/ior/iorflags/*', '-r -R -k -G 1')
     self.ior_nvme_xfersize = self.params.get("nvme_transfer_size",
                                              '/run/ior/transfersize_blocksize/*', '16777216')
     # Get the number of daos_engine
     self.engines = self.server_managers[0].manager.job.yaml.engine_params
     self.dmg_command = self.get_dmg_command()
Пример #14
0
    def setUp(self):
        """Set up each test case."""
        # obtain separate logs
        self.update_log_file_names()
        # Start the servers and agents
        super(IorTestBase, self).setUp()

        # Get the parameters for IOR
        self.ior_cmd = IorCommand()
        self.ior_cmd.get_params(self)
        self.processes = self.params.get("np", '/run/ior/client_processes/*')
        # Until DAOS-3320 is resolved run IOR for POSIX
        # with single client node
        if self.ior_cmd.api.value == "POSIX":
            self.hostlist_clients = [self.hostlist_clients[0]]
            self.hostfile_clients = write_host_file.write_host_file(
                self.hostlist_clients, self.workdir,
                self.hostfile_clients_slots)
Пример #15
0
    def run_custom_ior_cmd(self,
                           ior_command,
                           clients,
                           results,
                           job_num,
                           intercept=None):
        """Run customized IOR command, not self.ior_cmd.

        Expected to be used with a threaded code where multiple IOR commands are
        executed in parallel.

        Display pool space before running it for a reference.

        Args:
            ior_command (IorCommand): Custom IOR command instance.
            clients (list): hosts on which to run ior
            results (dict): A dictionary object to store the ior metrics
            job_num (int): Assigned job number
            intercept (str, optional): path to interception library. Defaults to
                None.
        """
        self.log.info("--- IOR Thread %d: Start ---", job_num)
        tsize = ior_command.transfer_size.value
        testfile = os.path.join(self.dfuse.mount_dir.value,
                                "testfile{}{}".format(tsize, job_num))
        if intercept:
            testfile += "intercept"
        ior_command.test_file.update(testfile)

        # Get the custom job manager that's associated with this thread.
        manager = get_job_manager(self, "Mpirun", ior_command, self.subprocess,
                                  "mpich")

        procs = (self.processes // len(self.hostlist_clients)) * len(clients)
        env = ior_command.get_default_env(str(manager), self.client_log)
        if intercept:
            env["LD_PRELOAD"] = intercept
        manager.assign_hosts(clients, self.workdir,
                             self.hostfile_clients_slots)
        manager.assign_processes(procs)
        manager.assign_environment(env)

        self.log.info("--- IOR Thread %d: Starting IOR ---", job_num)
        self.display_pool_space()
        try:
            ior_output = manager.run()
            results[job_num] = [True]
            results[job_num].extend(IorCommand.get_ior_metrics(ior_output))
        except CommandFailure as error:
            results[job_num] = [False, "IOR failed: {}".format(error)]
        finally:
            self.display_pool_space()

        self.log.info("--- IOR Thread %d: End ---", job_num)
Пример #16
0
    def create_ior_cmdline(self, job_params, job_spec, pool):
        """Create an IOR cmdline to run in slurm batch.

        Args:
            job_params (str): job params from yaml file
            job_spec (str): specific ior job to run
            pool (obj):   TestPool obj

        Returns:
            cmd: cmdline string

        """
        command = []
        iteration = self.test_iteration
        ior_params = "/run/" + job_spec + "/"

        ior_cmd = IorCommand()
        ior_cmd.namespace = ior_params
        ior_cmd.get_params(self)
        if iteration is not None and iteration < 0:
            ior_cmd.repetitions.update(1000000)
        ior_cmd.max_duration.update(self.params.get("time", job_params + '*'))
        # IOR job specs with a list of parameters; update each value
        #   transfer_size
        #   block_size
        #   daos object class
        tsize_list = ior_cmd.transfer_size.value
        bsize_list = ior_cmd.block_size.value
        oclass_list = ior_cmd.daos_oclass.value
        for b_size in bsize_list:
            ior_cmd.block_size.update(b_size)
            for o_type in oclass_list:
                ior_cmd.daos_oclass.update(o_type)
                for t_size in tsize_list:
                    ior_cmd.transfer_size.update(t_size)
                    ior_cmd.set_daos_params(self.server_group, pool)
                    # export the user environment to test node
                    exports = ["ALL"]
                    if ior_cmd.api.value == "MPIIO":
                        env = {
                            "CRT_ATTACH_INFO_PATH": os.path.join(
                                self.basepath, "install/tmp"),
                            "DAOS_POOL": str(ior_cmd.daos_pool.value),
                            "MPI_LIB": "\"\"",
                            "DAOS_SVCL": str(ior_cmd.daos_svcl.value),
                            "DAOS_SINGLETON_CLI": 1,
                            "FI_PSM2_DISCONNECT": 1
                        }
                        exports.extend(
                            ["{}={}".format(
                                key, val) for key, val in env.items()])
                    cmd = "srun -l --mpi=pmi2 --export={} {}".format(
                        ",".join(exports), ior_cmd)
                    command.append(cmd)
                    self.log.debug("<<IOR cmdline >>: %s \n", cmd)
        return command
Пример #17
0
    def test_ior_intercept_verify_data(self):
        """Jira ID: DAOS-3502.

        Test Description:
            Purpose of this test is to run ior through dfuse with
            interception library on 5 clients and without interception
            library on 1 client for at least 30 minutes and verify the
            data integrity using ior's Read Verify and Write Verify
            options.

        Use case:
            Run ior with read, write, fpp, read verify
            write verify for 30 minutes
            Run ior with read, write, read verify
            write verify for 30 minutes

        :avocado: tags=all,full_regression
        :avocado: tags=hw,large
        :avocado: tags=daosio,ior_intercept_verify_data
        """
        self.add_pool()
        self.add_container(self.pool)

        intercept = os.path.join(self.prefix, 'lib64', 'libioil.so')
        results = dict()
        client_count = len(self.hostlist_clients)
        w_clients = self.hostlist_clients[0:client_count - 1]
        wo_clients = [self.hostlist_clients[-1]]

        self.run_ior_threads_il(results=results,
                                intercept=intercept,
                                with_clients=w_clients,
                                without_clients=wo_clients)

        IorCommand.log_metrics(self.log,
                               "5 clients - with interception library",
                               results[1])
        IorCommand.log_metrics(self.log,
                               "1 client - without interception library",
                               results[2])
Пример #18
0
    def setUp(self):
        """Set up each test case."""
        # obtain separate logs
        self.update_log_file_names()
        # Start the servers and agents
        super(IorTestBase, self).setUp()

        # Get the parameters for IOR
        self.ior_cmd = IorCommand()
        self.ior_cmd.get_params(self)
        self.processes = self.params.get("np", '/run/ior/client_processes/*')
        self.co_prop = self.params.get("container_properties",
                                       "/run/container/*")
        # Until DAOS-3320 is resolved run IOR for POSIX
        # with single client node
        if self.ior_cmd.api.value == "POSIX":
            self.hostlist_clients = [self.hostlist_clients[0]]
            self.hostfile_clients = write_host_file.write_host_file(
                self.hostlist_clients, self.workdir,
                self.hostfile_clients_slots)
        # lock is needed for run_multiple_ior method.
        self.lock = threading.Lock()
Пример #19
0
    def ior_thread(self, pool, oclass, api, test, flags, results):
        """Start threads and wait until all threads are finished.
        Args:
            pool (object): pool handle
            oclass (str): IOR object class
            api (str): IOR api
            test (list): IOR test sequence
            flags (str): IOR flags
            results (queue): queue for returning thread results

        Returns:
            None
        """
        processes = self.params.get("slots", "/run/ior/clientslots/*")
        container_info = {}
        mpio_util = MpioUtils()
        if mpio_util.mpich_installed(self.hostlist_clients) is False:
            self.fail("Exiting Test : Mpich not installed on :"
                      " {}".format(self.hostfile_clients[0]))
        self.pool = pool
        # Define the arguments for the ior_runner_thread method
        ior_cmd = IorCommand()
        ior_cmd.get_params(self)
        ior_cmd.set_daos_params(self.server_group, self.pool)
        ior_cmd.daos_oclass.update(oclass)
        ior_cmd.api.update(api)
        ior_cmd.transfer_size.update(test[2])
        ior_cmd.block_size.update(test[3])
        ior_cmd.flags.update(flags)

        container_info["{}{}{}"
                       .format(oclass,
                               api,
                               test[2])] = str(uuid.uuid4())

        # Define the job manager for the IOR command
        manager = Mpirun(ior_cmd, mpitype="mpich")
        manager.job.daos_cont.update(container_info
                                     ["{}{}{}".format(oclass,
                                                      api,
                                                      test[2])])
        env = ior_cmd.get_default_env(str(manager))
        manager.assign_hosts(self.hostlist_clients, self.workdir, None)
        manager.assign_processes(processes)
        manager.assign_environment(env, True)

        # run IOR Command
        try:
            manager.run()
        except CommandFailure as _error:
            results.put("FAIL")
Пример #20
0
    def start_ior_thread(self, create_cont, operation):
        """Start IOR write/read threads and wait until all threads are finished.

        Args:
            create_cont (Bool): To create the new container or not.
            operation (str):
                Write/WriteRead: It will Write or Write/Read base on IOR parameter in yaml file.
                Auto_Write/Auto_Read: It will calculate the IOR block size based on requested
                                        storage % to be fill.
        """
        # IOR flag can Write/Read based on test yaml
        self.ior_local_cmd.flags.value = self.ior_default_flags

        # Calculate the block size based on server % to fill up.
        if 'Auto' in operation:
            block_size = self.calculate_ior_block_size()
            self.ior_local_cmd.block_size.update('{}'.format(block_size))

        # For IOR Read operation update the read only flag from yaml file.
        if 'Auto_Read' in operation or operation == "Read":
            create_cont = False
            self.ior_local_cmd.flags.value = self.ior_read_flags

        self.ior_local_cmd.set_daos_params(self.server_group, self.pool)
        self.ior_local_cmd.test_file.update('/testfile')

        # Created new container or use the existing container for reading
        if create_cont:
            self.create_container()
        self.ior_local_cmd.dfs_cont.update(self.nvme_local_cont.uuid)

        # Define the job manager for the IOR command
        job_manager_main = get_job_manager(self, "Mpirun", self.ior_local_cmd, mpi_type="mpich")
        env = self.ior_local_cmd.get_default_env(str(job_manager_main))
        job_manager_main.assign_hosts(self.hostlist_clients, self.workdir, None)
        job_manager_main.assign_environment(env, True)
        job_manager_main.assign_processes(self.params.get("np", '/run/ior/client_processes/*'))

        # run IOR Command
        try:
            output = job_manager_main.run()
            self.ior_matrix = IorCommand.get_ior_metrics(output)

            for line in output.stdout_text.splitlines():
                if 'WARNING' in line and self.fail_on_warning:
                    self.result.append("FAIL-IOR command issued warnings.")
        except (CommandFailure, TestFail) as error:
            self.result.append("FAIL - {}".format(error))
Пример #21
0
    def log_metrics(self, without_intercept, with_intercept):
        """Log the ior metrics because the stdout from ior can be mixed
           because of multithreading.

           Args:
               without_intercept (dict): IOR Metrics without using
                                         interception library.
               with_intercept (dict): IOR Metrics using interception
                                      library.
        """
        IorCommand.log_metrics(self.log, "3 clients - without " +
                               "interception library", without_intercept[1])
        IorCommand.log_metrics(self.log, "3 clients - with " +
                               "interception library", with_intercept[1])
        IorCommand.log_metrics(self.log, "1 client - without " +
                               "interception library", without_intercept[2])
        IorCommand.log_metrics(self.log, "1 clients - without " +
                               "interception library", with_intercept[2])
Пример #22
0
    def run_ior_report_error(self, results, job_num, file_name, pool,
                             container, namespace):
        """Run IOR command and store the results to results dictionary.

        Create a new IorCommand object instead of using the one in IorTestBase because
        we'll run a test that runs multiple IOR processes at the same time.

        Args:
            results (dict): A dictionary object to store the ior metrics
            job_num (int): Assigned job number
            file_name (str): File name used for self.ior_cmd.test_file.
            oclass (str): Value for dfs_oclass and dfs_dir_oclass.
            pool (TestPool): Pool to run IOR.
            container (TestContainer): Container to run IOR.
        """
        # Update the object class depending on the test case.
        ior_cmd = IorCommand(namespace=namespace)
        ior_cmd.get_params(self)

        # Standard IOR prep sequence.
        ior_cmd.set_daos_params(self.server_group, pool, container.uuid)
        testfile = os.path.join("/", file_name)
        ior_cmd.test_file.update(testfile)

        manager = get_job_manager(test=self,
                                  class_name="Mpirun",
                                  job=ior_cmd,
                                  subprocess=self.subprocess,
                                  mpi_type="mpich")
        manager.assign_hosts(self.hostlist_clients, self.workdir,
                             self.hostfile_clients_slots)
        ppn = self.params.get("ppn", '/run/ior/client_processes/*')
        manager.ppn.update(ppn, 'mpirun.ppn')
        manager.processes.update(None, 'mpirun.np')

        # Run the command.
        try:
            self.log.info("--- IOR command %d start ---", job_num)
            ior_output = manager.run()
            results[job_num] = [True]
            # For debugging.
            results[job_num].extend(IorCommand.get_ior_metrics(ior_output))
            # Command worked, but append the error message if any.
            results[job_num].append(ior_output.stderr_text)
            self.log.info("--- IOR command %d end ---", job_num)
        except CommandFailure as error:
            self.log.info("--- IOR command %d failed ---", job_num)
            results[job_num] = [False, "IOR failed: {}".format(error)]
Пример #23
0
    def ior_bg_thread(self, results):
        """Start IOR Background thread, This will write small data set and
        keep reading it in loop until it fails or main program exit.

        Args:
            results (queue): queue for returning thread results
        """
        mpio_util = MpioUtils()
        if mpio_util.mpich_installed(self.hostlist_clients) is False:
            self.fail("Exiting Test: Mpich not installed")

        # Define the IOR Command and use the parameter from yaml file.
        ior_bg_cmd = IorCommand()
        ior_bg_cmd.get_params(self)
        ior_bg_cmd.set_daos_params(self.server_group, self.pool)
        ior_bg_cmd.dfs_oclass.update(self.ior_cmd.dfs_oclass.value)
        ior_bg_cmd.api.update(self.ior_cmd.api.value)
        ior_bg_cmd.transfer_size.update(self.ior_scm_xfersize)
        ior_bg_cmd.block_size.update(self.ior_cmd.block_size.value)
        ior_bg_cmd.flags.update(self.ior_cmd.flags.value)
        ior_bg_cmd.test_file.update('/testfile_background')

        # Define the job manager for the IOR command
        self.job_manager = Mpirun(ior_bg_cmd, mpitype="mpich")
        self.create_cont()
        self.job_manager.job.dfs_cont.update(self.container.uuid)
        env = ior_bg_cmd.get_default_env(str(self.job_manager))
        self.job_manager.assign_hosts(self.hostlist_clients, self.workdir,
                                      None)
        self.job_manager.assign_processes(1)
        self.job_manager.assign_environment(env, True)
        print('----Run IOR in Background-------')
        # run IOR Write Command
        try:
            self.job_manager.run()
        except (CommandFailure, TestFail) as _error:
            results.put("FAIL")
            return

        # run IOR Read Command in loop
        ior_bg_cmd.flags.update(self.ior_read_flags)
        while True:
            try:
                self.job_manager.run()
            except (CommandFailure, TestFail) as _error:
                results.put("FAIL")
                break
Пример #24
0
    def ior_thread(self, pool, oclass, api, test, flags, results):
        """Start threads and wait until all threads are finished.

        Args:
            pool (TestPool): Pool to run IOR command on.
            oclass (str): IOR object class
            API (str): IOR API
            test (list): IOR test sequence
            flags (str): IOR flags
            results (queue): queue for returning thread results

        Returns:
            None

        """
        processes = self.params.get("slots", "/run/ior/clientslots/*")
        container_info = {}

        # Define the arguments for the ior_runner_thread method
        ior_cmd = IorCommand()
        ior_cmd.get_params(self)
        ior_cmd.set_daos_params(self.server_group, pool)
        ior_cmd.dfs_oclass.update(oclass)
        ior_cmd.api.update(api)
        ior_cmd.transfer_size.update(test[2])
        ior_cmd.block_size.update(test[3])
        ior_cmd.flags.update(flags)

        container_info["{}{}{}"
                       .format(oclass,
                               api,
                               test[2])] = str(uuid.uuid4())

        # Define the job manager for the IOR command
        job_manager = get_job_manager(self, "Mpirun", ior_cmd, mpi_type="mpich")
        key = "{}{}{}".format(oclass, api, test[2])
        job_manager.job.dfs_cont.update(container_info[key])
        env = ior_cmd.get_default_env(str(job_manager))
        job_manager.assign_hosts(self.hostlist_clients, self.workdir, None)
        job_manager.assign_processes(processes)
        job_manager.assign_environment(env, True)

        # run IOR Command
        try:
            job_manager.run()
        except CommandFailure as _error:
            results.put("FAIL")
Пример #25
0
    def ior_bg_thread(self):
        """Start IOR Background thread, This will write small data set and
        keep reading it in loop until it fails or main program exit.

        """

        # Define the IOR Command and use the parameter from yaml file.
        ior_bg_cmd = IorCommand()
        ior_bg_cmd.get_params(self)
        ior_bg_cmd.set_daos_params(self.server_group, self.pool)
        ior_bg_cmd.dfs_oclass.update(self.ior_cmd.dfs_oclass.value)
        ior_bg_cmd.api.update(self.ior_cmd.api.value)
        ior_bg_cmd.transfer_size.update(self.ior_scm_xfersize)
        ior_bg_cmd.block_size.update(self.ior_cmd.block_size.value)
        ior_bg_cmd.flags.update(self.ior_cmd.flags.value)
        ior_bg_cmd.test_file.update('/testfile_background')

        # Define the job manager for the IOR command
        job_manager = get_job_manager(self,
                                      "Mpirun",
                                      ior_bg_cmd,
                                      mpi_type="mpich")

        # create container
        container = self.get_container(self.pool)

        job_manager.job.dfs_cont.update(container.uuid)
        env = ior_bg_cmd.get_default_env(str(job_manager))
        job_manager.assign_hosts(self.hostlist_clients, self.workdir, None)
        job_manager.assign_processes(1)
        job_manager.assign_environment(env, True)
        print('----Run IOR in Background-------')
        # run IOR Write Command
        try:
            job_manager.run()
        except (CommandFailure, TestFail) as _error:
            self.test_result.append("FAIL")
            return

        # run IOR Read Command in loop
        ior_bg_cmd.flags.update(self.ior_read_flags)
        while True:
            try:
                job_manager.run()
            except (CommandFailure, TestFail) as _error:
                break
Пример #26
0
    def ior_thread(self, pool, oclass, api, test, flags, results):
        """This method calls job manager for IOR command
        invocation.
        Args:
            pool (object): pool handle
            oclass (str): IOR object class
            API (str): IOR API
            test (list): IOR test sequence
            flags (str): IOR flags
            results (queue): queue for returning thread results
        """
        processes = self.params.get("slots", "/run/ior/clientslots/*")
        mpio_util = MpioUtils()
        if mpio_util.mpich_installed(self.hostlist_clients) is False:
            self.fail("Exiting Test: Mpich not installed")
        self.pool = pool
        # Define the arguments for the ior_runner_thread method
        ior_cmd = IorCommand()
        ior_cmd.get_params(self)
        ior_cmd.set_daos_params(self.server_group, self.pool)
        ior_cmd.dfs_oclass.update(oclass)
        ior_cmd.api.update(api)
        ior_cmd.transfer_size.update(test[0])
        ior_cmd.block_size.update(test[1])
        ior_cmd.flags.update(flags)
        if "-w" in flags:
            self.container_info["{}{}{}"
                                .format(oclass,
                                        api,
                                        test[0])] = str(uuid.uuid4())

        # Define the job manager for the IOR command
        manager = Mpirun(ior_cmd, mpitype="mpich")
        key = "".join([oclass, api, str(test[0])])
        manager.job.dfs_cont.update(self.container_info[key])
        env = ior_cmd.get_default_env(str(manager))
        manager.assign_hosts(self.hostlist_clients, self.workdir, None)
        manager.assign_processes(processes)
        manager.assign_environment(env, True)

        # run IOR Command
        try:
            manager.run()
        except CommandFailure as _error:
            results.put("FAIL")
Пример #27
0
    def run_multiple_ior(self,
                         hostfile,
                         num_clients,
                         results,
                         job_num,
                         intercept=None):
        # pylint: disable=too-many-arguments
        """Run the IOR command.

        Args:
            manager (str): mpi job manager command
            processes (int): number of host processes
            intercept (str): path to interception library.
        """
        self.lock.acquire(True)
        tsize = self.ior_cmd.transfer_size.value
        testfile = os.path.join(self.dfuse.mount_dir.value,
                                "testfile{}{}".format(tsize, job_num))
        if intercept:
            testfile += "intercept"
        self.ior_cmd.test_file.update(testfile)
        manager = self.get_ior_job_manager_command()
        procs = (self.processes // len(self.hostlist_clients)) * num_clients
        env = self.ior_cmd.get_default_env(str(manager), self.client_log)
        if intercept:
            env["LD_PRELOAD"] = intercept
        manager.setup_command(env, hostfile, procs)
        self.lock.release()
        try:
            self.pool.display_pool_daos_space()
            out = manager.run()
            self.lock.acquire(True)
            results[job_num] = IorCommand.get_ior_metrics(out)
            self.lock.release()
        except CommandFailure as error:
            self.log.error("IOR Failed: %s", str(error))
            self.fail("Test was expected to pass but it failed.\n")
        finally:
            self.pool.display_pool_daos_space()
Пример #28
0
    def run_multiple_ior(self, clients, results, job_num, intercept=None):
        """Run the IOR command.

        Args:
            clients (list): hosts on which to run ior
            results (dict): A dictionary object to store the ior metrics
            job_num (int): Assigned job number
            intercept (str, optional): path to interception library. Defaults to
                None.
        """
        self.lock.acquire(True)
        tsize = self.ior_cmd.transfer_size.value
        testfile = os.path.join(self.dfuse.mount_dir.value,
                                "testfile{}{}".format(tsize, job_num))
        if intercept:
            testfile += "intercept"
        self.ior_cmd.test_file.update(testfile)
        manager = self.get_ior_job_manager_command()
        procs = (self.processes // len(self.hostlist_clients)) * len(clients)
        env = self.ior_cmd.get_default_env(str(manager), self.client_log)
        if intercept:
            env["LD_PRELOAD"] = intercept
        manager.assign_hosts(clients, self.workdir,
                             self.hostfile_clients_slots)
        manager.assign_processes(procs)
        manager.assign_environment(env)
        self.lock.release()
        try:
            self.pool.display_pool_daos_space()
            out = manager.run()
            self.lock.acquire(True)
            results[job_num] = IorCommand.get_ior_metrics(out)
            self.lock.release()
        except CommandFailure as error:
            self.log.error("IOR Failed: %s", str(error))
            self.fail("Test was expected to pass but it failed.\n")
        finally:
            self.pool.display_pool_daos_space()
Пример #29
0
    def test_metadata_server_restart(self):
        """JIRA ID: DAOS-1512.

        Test Description:
            This test will verify 2000 IOR small size container after server
            restart. Test will write IOR in 5 different threads for faster
            execution time. Each thread will create 400 (8bytes) containers to
            the same pool. Restart the servers, read IOR container file written
            previously and validate data integrity by using IOR option
            "-R -G 1".

        Use Cases:
            ?

        :avocado: tags=metadata,metadata_ior,nvme,large
        """
        files_per_thread = 400
        total_ior_threads = 5
        self.out_queue = queue.Queue()

        processes = self.params.get("slots", "/run/ior/clientslots/*")

        list_of_uuid_lists = [
            [str(uuid.uuid4()) for _ in range(files_per_thread)]
            for _ in range(total_ior_threads)]

        # Launch threads to run IOR to write data, restart the agents and
        # servers, and then run IOR to read the data
        for operation in ("write", "read"):
            # Create the IOR threads
            threads = []
            for index in range(total_ior_threads):
                # Define the arguments for the ior_runner_thread method
                ior_cmd = IorCommand()
                ior_cmd.get_params(self)
                ior_cmd.set_daos_params(self.server_group, self.pool)
                ior_cmd.flags.value = self.params.get(
                    "F", "/run/ior/ior{}flags/".format(operation))

                # Define the job manager for the IOR command
                manager = Orterun(ior_cmd)
                env = ior_cmd.get_default_env(str(manager))
                manager.assign_hosts(self.hostlist_clients, self.workdir, None)
                manager.assign_processes(processes)
                manager.assign_environment(env)

                # Add a thread for these IOR arguments
                threads.append(
                    threading.Thread(
                        target=ior_runner_thread,
                        kwargs={
                            "manager": manager,
                            "uuids": list_of_uuid_lists[index],
                            "results": self.out_queue}))

                self.log.info(
                    "Creatied %s thread %s with container uuids %s", operation,
                    index, list_of_uuid_lists[index])

            # Launch the IOR threads
            if self.thread_control(threads, operation) == "FAIL":
                self.d_log.error("IOR {} Thread FAIL".format(operation))
                self.fail("IOR {} Thread FAIL".format(operation))

            # Restart the agents and servers after the write / before the read
            if operation == "write":
                # Stop the agents
                errors = self.stop_agents()
                self.assertEqual(
                    len(errors), 0,
                    "Error stopping agents:\n  {}".format("\n  ".join(errors)))

                # Stop the servers
                errors = self.stop_servers()
                self.assertEqual(
                    len(errors), 0,
                    "Error stopping servers:\n  {}".format("\n  ".join(errors)))

                # Start the agents
                self.start_agent_managers()

                # Start the servers
                self.start_server_managers()
Пример #30
0
    def test_metadata_server_restart(self):
        """JIRA ID: DAOS-1512.

        Test Description:
            This test will verify 2000 IOR small size container after server
            restart. Test will write IOR in 5 different threads for faster
            execution time. Each thread will create 400 (8bytes) containers to
            the same pool. Restart the servers, read IOR container file written
            previously and validate data integrity by using IOR option
            "-R -G 1".

        Use Cases:
            ?

        :avocado: tags=metadata,metadata_ior,nvme,small
        """
        files_per_thread = 400
        total_ior_threads = 5
        self.out_queue = Queue.Queue()

        processes = self.params.get("slots", "/run/ior/clientslots/*")

        list_of_uuid_lists = [[
            str(uuid.uuid4()) for _ in range(files_per_thread)
        ] for _ in range(total_ior_threads)]

        # Launch threads to run IOR to write data, restart the agents and
        # servers, and then run IOR to read the data
        for operation in ("write", "read"):
            # Create the IOR threads
            threads = []
            for index in range(total_ior_threads):
                # Define the arguments for the ior_runner_thread method
                ior_cmd = IorCommand()
                ior_cmd.get_params(self)
                ior_cmd.set_daos_params(self.server_group, self.pool)
                ior_cmd.flags.value = self.params.get(
                    "F", "/run/ior/ior{}flags/".format(operation))

                # Add a thread for these IOR arguments
                threads.append(
                    threading.Thread(target=ior_runner_thread,
                                     kwargs={
                                         "ior_cmd": ior_cmd,
                                         "uuids": list_of_uuid_lists[index],
                                         "mgr": self.orterun,
                                         "attach": self.tmp,
                                         "hostfile": self.hostfile_clients,
                                         "procs": processes,
                                         "results": self.out_queue
                                     }))

                self.log.info("Creatied %s thread %s with container uuids %s",
                              operation, index, list_of_uuid_lists[index])

            # Launch the IOR threads
            if self.thread_control(threads, operation) == "FAIL":
                self.d_log.error("IOR {} Thread FAIL".format(operation))
                self.fail("IOR {} Thread FAIL".format(operation))

            # Restart the agents and servers after the write / before the read
            if operation == "write":
                # Stop the agents and servers
                if self.agent_sessions:
                    stop_agent(self.agent_sessions, self.hostlist_clients)
                stop_server(hosts=self.hostlist_servers)

                # Start the agents
                self.agent_sessions = run_agent(self.basepath,
                                                self.hostlist_clients,
                                                self.hostlist_servers)

                # Start the servers
                run_server(self.hostfile_servers,
                           self.server_group,
                           self.basepath,
                           clean=False)