Пример #1
0
    def __init__(self, daosbinpath, runnerpath, timeout=300):
        """Create a ServerManager object.

        Args:
            daosbinpath (str): Path to daos bin
            runnerpath (str): Path to Orterun binary.
            timeout (int, optional): Time for the server to start.
                Defaults to 300.
        """
        super(ServerManager, self).__init__("/run/server_manager/*", "", "")

        self.daosbinpath = daosbinpath
        self._hosts = None

        # Setup orterun command defaults
        self.runner = Orterun(DaosServer(self.daosbinpath), runnerpath, True)

        # Setup server command defaults
        self.runner.job.action.value = "start"
        self.runner.job.get_action_command()

        # Parameters that user can specify in the test yaml to modify behavior.
        self.debug = BasicParameter(None, True)  # ServerCommand param
        self.insecure = BasicParameter(None, True)  # ServerCommand param
        self.recreate = BasicParameter(None, False)  # ServerCommand param
        self.sudo = BasicParameter(None, False)  # ServerCommand param
        self.srv_timeout = BasicParameter(None, timeout)  # ServerCommand param
        self.report_uri = BasicParameter(None)  # Orterun param
        self.enable_recovery = BasicParameter(None, True)  # Orterun param
        self.export = BasicParameter(None)  # Orterun param
Пример #2
0
    def get_job_manager_command(self, manager):
        """Get the MPI job manager command for Mdtest.

        Returns:
            JobManager: the object for the mpi job manager command

        """
        # Initialize MpioUtils if mdtest needs to be run using mpich
        if manager == "MPICH":
            mpio_util = MpioUtils()
            if mpio_util.mpich_installed(self.hostlist_clients) is False:
                self.fail("Exiting Test: Mpich not installed")
            path = os.path.join(mpio_util.mpichinstall, "bin")
            return Mpirun(self.mdtest_cmd, path)

        path = os.path.join(self.ompi_prefix, "bin")
        return Orterun(self.mdtest_cmd, path)
Пример #3
0
class ServerManager(ExecutableCommand):
    """Defines object to manage server functions and launch server command."""
    def __init__(self, daosbinpath, runnerpath, timeout=300):
        """Create a ServerManager object.

        Args:
            daosbinpath (str): Path to daos bin
            runnerpath (str): Path to Orterun binary.
            timeout (int, optional): Time for the server to start.
                Defaults to 300.
        """
        super(ServerManager, self).__init__("/run/server_manager/*", "", "")

        self.daosbinpath = daosbinpath
        self._hosts = None

        # Setup orterun command defaults
        self.runner = Orterun(DaosServer(self.daosbinpath), runnerpath, True)

        # Setup server command defaults
        self.runner.job.action.value = "start"
        self.runner.job.get_action_command()

        # Parameters that user can specify in the test yaml to modify behavior.
        self.debug = BasicParameter(None, True)  # ServerCommand param
        self.insecure = BasicParameter(None, True)  # ServerCommand param
        self.recreate = BasicParameter(None, False)  # ServerCommand param
        self.sudo = BasicParameter(None, False)  # ServerCommand param
        self.srv_timeout = BasicParameter(None, timeout)  # ServerCommand param
        self.report_uri = BasicParameter(None)  # Orterun param
        self.enable_recovery = BasicParameter(None, True)  # Orterun param
        self.export = BasicParameter(None)  # Orterun param

    @property
    def hosts(self):
        """Hosts attribute getter."""
        return self._hosts

    @hosts.setter
    def hosts(self, value):
        """Hosts attribute setter.

        Args:
            value (tuple): (list of hosts, workdir, slots)
        """
        self._hosts, workdir, slots = value
        self.runner.processes.value = len(self._hosts)
        self.runner.hostfile.value = write_host_file(self._hosts, workdir,
                                                     slots)
        self.runner.job.server_list = self._hosts

    def get_params(self, test):
        """Get values from the yaml file.

        Assign the ServerManager parameters to their respective ServerCommand
        and Orterun class parameters.

        Args:
            test (Test): avocado Test object
        """
        server_params = ["debug", "sudo", "srv_timeout"]
        server_start_params = ["insecure", "recreate"]
        runner_params = ["enable_recovery", "export", "report_uri"]
        super(ServerManager, self).get_params(test)
        self.runner.job.yaml_params.get_params(test)
        self.runner.get_params(test)
        for name in self.get_param_names():
            if name in server_params:
                if name == "sudo":
                    setattr(self.runner.job, name, getattr(self, name).value)
                elif name == "srv_timeout":
                    setattr(self.runner.job, "timeout",
                            getattr(self, name).value)
                else:
                    getattr(self.runner.job, name).value = getattr(self,
                                                                   name).value
            if name in server_start_params:
                getattr(self.runner.job.action_command, name).value = \
                    getattr(self, name).value
            if name in runner_params:
                getattr(self.runner, name).value = getattr(self, name).value

        # Run daos_server with test variant specific log file names if specified
        self.runner.job.yaml_params.update_log_files(
            getattr(test, "control_log"), getattr(test, "helper_log"),
            getattr(test, "server_log"))

    def run(self):
        """Execute the runner subprocess."""
        self.log.info("Start CMD>>> %s", str(self.runner))

        # Temporary display debug mount information
        self.log.info("%s", "=" * 80)
        pcmd(self._hosts, "df -h -t tmpfs", True, None, None)
        self.log.info("%s", "=" * 80)

        return self.runner.run()

    def start(self, yamlfile):
        """Start the server through the runner."""
        storage_prep_flag = ""
        self.runner.job.set_config(yamlfile)
        self.server_clean()

        # Prepare SCM storage in servers
        if self.runner.job.yaml_params.is_scm():
            storage_prep_flag = "dcpm"
            self.log.info("Performing SCM storage prepare in <format> mode")
        else:
            storage_prep_flag = "ram"

        # Prepare nvme storage in servers
        if self.runner.job.yaml_params.is_nvme():
            if storage_prep_flag == "dcpm":
                storage_prep_flag = "dcpm_nvme"
            elif storage_prep_flag == "ram":
                storage_prep_flag = "ram_nvme"
            else:
                storage_prep_flag = "nvme"
            self.log.info("Performing NVMe storage prepare in <format> mode")
            # Make sure log file has been created for ownership change
            lfile = self.runner.job.yaml_params.server_params[
                -1].log_file.value
            if lfile is not None:
                self.log.info("Creating log file")
                cmd_touch_log = "touch {}".format(lfile)
                pcmd(self._hosts, cmd_touch_log, False)
        if storage_prep_flag != "ram":
            self.storage_prepare(getpass.getuser(), storage_prep_flag)
            self.runner.mca.update({"plm_rsh_args": "-l root"}, "orterun.mca",
                                   True)

        # Start the server and wait for each host to require a SCM format
        self.runner.job.mode = "format"
        try:
            self.run()
        except CommandFailure as error:
            raise ServerFailed(
                "Failed to start servers before format: {}".format(error))

        # Format storage and wait for server to change ownership
        self.log.info("Formatting hosts: <%s>", self._hosts)
        servers_with_ports = [
            "{}:{}".format(host, self.runner.job.yaml_params.port)
            for host in self._hosts
        ]
        storage_format(self.daosbinpath, ",".join(servers_with_ports))

        # Wait for all the doas_io_servers to start
        self.runner.job.mode = "normal"
        if not self.runner.job.check_subprocess_status(self.runner.process):
            raise ServerFailed("Failed to start servers after format")

        return True

    def stop(self):
        """Stop the server through the runner."""
        self.log.info("Stopping servers")
        if self.runner.job.yaml_params.is_nvme():
            self.kill()
            self.storage_reset()
            # Make sure the mount directory belongs to non-root user
            self.log.info("Changing ownership of mount to non-root user")
            cmd = "sudo chown -R {0}:{0} /mnt/daos*".format(getpass.getuser())
            pcmd(self._hosts, cmd, False)
        else:
            try:
                self.runner.stop()
            except CommandFailure as error:
                raise ServerFailed("Failed to stop servers:{}".format(error))

    def server_clean(self):
        """Prepare the hosts before starting daos server."""
        # Kill any doas servers running on the hosts
        self.kill()
        # Clean up any files that exist on the hosts
        self.clean_files()

    def kill(self):
        """Forcably kill any daos server processes running on hosts.

        Sometimes stop doesn't get everything.  Really whack everything
        with this.

        """
        kill_cmds = [
            "sudo pkill '(daos_server|daos_io_server)' --signal INT",
            "sleep 5",
            "pkill '(daos_server|daos_io_server)' --signal KILL",
        ]
        self.log.info("Killing any server processes")
        pcmd(self._hosts, "; ".join(kill_cmds), False, None, None)

    def clean_files(self):
        """Clean the tmpfs on the servers."""
        clean_cmds = []
        for server_params in self.runner.job.yaml_params.server_params:
            scm_mount = server_params.scm_mount.value
            self.log.info("Cleaning up the %s directory.", str(scm_mount))

            # Remove the superblocks
            cmd = "rm -fr {}/*".format(scm_mount)
            if cmd not in clean_cmds:
                clean_cmds.append(cmd)

            # Dismount the scm mount point
            cmd = "while sudo umount {}; do continue; done".format(scm_mount)
            if cmd not in clean_cmds:
                clean_cmds.append(cmd)

            if self.runner.job.yaml_params.is_scm():
                scm_list = server_params.scm_list.value
                if isinstance(scm_list, list):
                    self.log.info("Cleaning up the following device(s): %s.",
                                  ", ".join(scm_list))
                    # Umount and wipefs the dcpm device
                    cmd_list = [
                        "for dev in {}".format(" ".join(scm_list)),
                        "do mount=$(lsblk $dev -n -o MOUNTPOINT)",
                        "if [ ! -z $mount ]", "then while sudo umount $mount",
                        "do continue", "done", "fi", "sudo wipefs -a $dev",
                        "done"
                    ]
                    cmd = "; ".join(cmd_list)
                    if cmd not in clean_cmds:
                        clean_cmds.append(cmd)

        pcmd(self._hosts, "; ".join(clean_cmds), True)

    def storage_prepare(self, user, device_type):
        """Prepare server's storage using the DAOS server's yaml settings file.

        Args:
            user (str): username for file permissions
            device_type (str): storage type - scm or nvme

        Raises:
            ServerFailed: if server failed to prepare storage

        """
        # Get the daos_server from the install path. Useful for testing
        # with daos built binaries.
        dev_param = ""
        device_args = ""
        daos_srv_bin = os.path.join(self.daosbinpath, "daos_server")
        if device_type == "dcpm":
            dev_param = "-s"
        elif device_type == "dcpm_nvme":
            device_args = " --hugepages=4096"
        elif device_type in ("ram_nvme", "nvme"):
            dev_param = "-n"
            device_args = " --hugepages=4096"
        else:
            raise ServerFailed("Invalid device type")
        cmd = "{} storage prepare {} -u \"{}\" {} -f".format(
            daos_srv_bin, dev_param, user, device_args)
        result = pcmd(self._hosts, cmd, timeout=120)
        if len(result) > 1 or 0 not in result:
            raise ServerFailed(
                "Error preparing {} storage".format(device_type))

    def storage_reset(self):
        """Reset the servers' storage.

        NOTE: Don't enhance this method to reset SCM. SCM will not be in a
        useful state for running next tests.

        Raises:
            ServerFailed: if server failed to reset storage

        """
        daos_srv_bin = os.path.join(self.daosbinpath, "daos_server")
        cmd = "{} storage prepare -n --reset -f".format(daos_srv_bin)
        result = pcmd(self._hosts, cmd)
        if len(result) > 1 or 0 not in result:
            raise ServerFailed("Error resetting NVMe storage")
Пример #4
0
    def test_metadata_server_restart(self):
        """JIRA ID: DAOS-1512.

        Test Description:
            This test will verify 2000 IOR small size container after server
            restart. Test will write IOR in 5 different threads for faster
            execution time. Each thread will create 400 (8bytes) containers to
            the same pool. Restart the servers, read IOR container file written
            previously and validate data integrity by using IOR option
            "-R -G 1".

        Use Cases:
            ?

        :avocado: tags=metadata,metadata_ior,nvme,small
        """
        files_per_thread = 400
        total_ior_threads = 5
        self.out_queue = queue.Queue()

        processes = self.params.get("slots", "/run/ior/clientslots/*")

        list_of_uuid_lists = [[
            str(uuid.uuid4()) for _ in range(files_per_thread)
        ] for _ in range(total_ior_threads)]

        # Launch threads to run IOR to write data, restart the agents and
        # servers, and then run IOR to read the data
        for operation in ("write", "read"):
            # Create the IOR threads
            threads = []
            for index in range(total_ior_threads):
                # Define the arguments for the ior_runner_thread method
                ior_cmd = IorCommand()
                ior_cmd.get_params(self)
                ior_cmd.set_daos_params(self.server_group, self.pool)
                ior_cmd.flags.value = self.params.get(
                    "F", "/run/ior/ior{}flags/".format(operation))

                # Define the job manager for the IOR command
                path = os.path.join(self.ompi_prefix, "bin")
                manager = Orterun(ior_cmd, path)
                env = ior_cmd.get_default_env(str(manager), self.tmp)
                manager.setup_command(env, self.hostfile_clients, processes)

                # Add a thread for these IOR arguments
                threads.append(
                    threading.Thread(target=ior_runner_thread,
                                     kwargs={
                                         "manager": manager,
                                         "uuids": list_of_uuid_lists[index],
                                         "results": self.out_queue
                                     }))

                self.log.info("Creatied %s thread %s with container uuids %s",
                              operation, index, list_of_uuid_lists[index])

            # Launch the IOR threads
            if self.thread_control(threads, operation) == "FAIL":
                self.d_log.error("IOR {} Thread FAIL".format(operation))
                self.fail("IOR {} Thread FAIL".format(operation))

            # Restart the agents and servers after the write / before the read
            if operation == "write":
                # Stop the agents and servers
                if self.agent_sessions:
                    stop_agent(self.agent_sessions, self.hostlist_clients)
                stop_server(hosts=self.hostlist_servers)

                # Start the agents
                self.agent_sessions = run_agent(self.basepath,
                                                self.hostlist_clients,
                                                self.hostlist_servers)

                # Start the servers
                run_server(self,
                           self.hostfile_servers,
                           self.server_group,
                           clean=False)
Пример #5
0
class ServerManager(ExecutableCommand):
    """Defines object to manage server functions and launch server command."""
    def __init__(self, daosbinpath, runnerpath, timeout=300):
        """Create a ServerManager object.

        Args:
            daosbinpath (str): Path to daos bin
            runnerpath (str): Path to Orterun binary.
            timeout (int, optional): Time for the server to start.
                Defaults to 300.
        """
        super(ServerManager, self).__init__("/run/server_manager/*", "", "")

        self.daosbinpath = daosbinpath
        self._hosts = None

        # Setup orterun command defaults
        self.runner = Orterun(DaosServer(self.daosbinpath), runnerpath, True)

        # Setup server command defaults
        self.runner.job.action.value = "start"
        self.runner.job.get_action_command()

        # Parameters that user can specify in the test yaml to modify behavior.
        self.debug = BasicParameter(None, True)  # ServerCommand param
        self.insecure = BasicParameter(None, True)  # ServerCommand param
        self.recreate = BasicParameter(None, True)  # ServerCommand param
        self.sudo = BasicParameter(None, False)  # ServerCommand param
        self.srv_timeout = BasicParameter(None, timeout)  # ServerCommand param
        self.report_uri = BasicParameter(None)  # Orterun param
        self.enable_recovery = BasicParameter(None, True)  # Orterun param
        self.export = BasicParameter(None)  # Orterun param

    @property
    def hosts(self):
        """Hosts attribute getter."""
        return self._hosts

    @hosts.setter
    def hosts(self, value):
        """Hosts attribute setter.

        Args:
            value (tuple): (list of hosts, workdir, slots)
        """
        self._hosts, workdir, slots = value
        self.runner.processes.value = len(self._hosts)
        self.runner.hostfile.value = write_host_file(self._hosts, workdir,
                                                     slots)
        self.runner.job.server_cnt = len(self._hosts)
        self.runner.job.server_list = self._hosts

    def get_params(self, test):
        """Get values from the yaml file.

        Assign the ServerManager parameters to their respective ServerCommand
        and Orterun class parameters.

        Args:
            test (Test): avocado Test object
        """
        server_params = ["debug", "sudo", "srv_timeout"]
        server_start_params = ["insecure", "recreate"]
        runner_params = ["enable_recovery", "export", "report_uri"]
        super(ServerManager, self).get_params(test)
        self.runner.job.yaml_params.get_params(test)
        self.runner.get_params(test)
        for name in self.get_param_names():
            if name in server_params:
                if name == "sudo":
                    setattr(self.runner.job, name, getattr(self, name).value)
                elif name == "srv_timeout":
                    setattr(self.runner.job, name, getattr(self, name).value)
                else:
                    getattr(self.runner.job, name).value = getattr(self,
                                                                   name).value
            if name in server_start_params:
                getattr(self.runner.job.action_command, name).value = \
                    getattr(self, name).value
            if name in runner_params:
                getattr(self.runner, name).value = getattr(self, name).value

    def run(self):
        """Execute the runner subprocess."""
        self.log.info("Start CMD>>> %s", str(self.runner))
        return self.runner.run()

    def start(self, yamlfile):
        """Start the server through the runner."""
        storage_prep_flag = ""
        self.runner.job.set_config(yamlfile)
        self.server_clean()
        # Prepare SCM storage in servers
        if self.runner.job.yaml_params.is_scm():
            storage_prep_flag = "dcpm"
            self.log.info("Performing SCM storage prepare in <format> mode")
        else:
            storage_prep_flag = "ram"

        # Prepare nvme storage in servers
        if self.runner.job.yaml_params.is_nvme():
            if storage_prep_flag == "dcpm":
                storage_prep_flag = "dcpm_nvme"
            elif storage_prep_flag == "ram":
                storage_prep_flag = "ram_nvme"
            else:
                storage_prep_flag = "nvme"
            self.log.info("Performing NVMe storage prepare in <format> mode")
            # Make sure log file has been created for ownership change
            lfile = self.runner.job.yaml_params.server_params[
                -1].log_file.value
            if lfile is not None:
                self.log.info("Creating log file")
                cmd_touch_log = "touch {}".format(lfile)
                pcmd(self._hosts, cmd_touch_log, False)
        if storage_prep_flag != "ram":
            storage_prepare(self._hosts, "root", storage_prep_flag)
            self.runner.mca.value = {"plm_rsh_args": "-l root"}

        try:
            self.run()
        except CommandFailure as details:
            self.log.info("<SERVER> Exception occurred: %s", str(details))
            # Kill the subprocess, anything that might have started
            self.kill()
            raise ServerFailed("Failed to start server in {} mode.".format(
                self.runner.job.mode))

        if self.runner.job.yaml_params.is_nvme() or \
           self.runner.job.yaml_params.is_scm():
            # Setup the hostlist to pass to dmg command
            servers_with_ports = [
                "{}:{}".format(host, self.runner.job.yaml_params.port)
                for host in self._hosts
            ]

            # Format storage and wait for server to change ownership
            self.log.info("Formatting hosts: <%s>", self._hosts)
            storage_format(self.daosbinpath, ",".join(servers_with_ports))
            self.runner.job.mode = "normal"
            try:
                self.runner.job.check_subprocess_status(self.runner.process)
            except CommandFailure as error:
                self.log.info("Failed to start after format: %s", str(error))

        return True

    def stop(self):
        """Stop the server through the runner."""
        self.log.info("Stopping servers")
        if self.runner.job.yaml_params.is_nvme():
            self.kill()
            storage_reset(self._hosts)
            # Make sure the mount directory belongs to non-root user
            self.log.info("Changing ownership of mount to non-root user")
            cmd = "sudo chown -R {0}:{0} /mnt/daos*".format(getpass.getuser())
            pcmd(self._hosts, cmd, False)
        else:
            try:
                self.runner.stop()
            except CommandFailure as error:
                raise ServerFailed("Failed to stop servers:{}".format(error))

    def server_clean(self):
        """Prepare the hosts before starting daos server."""
        # Kill any doas servers running on the hosts
        self.kill()
        # Clean up any files that exist on the hosts
        self.clean_files()

    def kill(self):
        """Forcably kill any daos server processes running on hosts.

        Sometimes stop doesn't get everything.  Really whack everything
        with this.

        """
        kill_cmds = [
            "sudo pkill '(daos_server|daos_io_server)' --signal INT",
            "sleep 5",
            "pkill '(daos_server|daos_io_server)' --signal KILL",
        ]
        self.log.info("Killing any server processes")
        pcmd(self._hosts, "; ".join(kill_cmds), False, None, None)

    def clean_files(self):
        """Clean the tmpfs on the servers."""
        scm_mount = self.runner.job.yaml_params.server_params[-1].scm_mount
        scm_list = self.runner.job.yaml_params.server_params[-1].scm_list.value
        clean_cmds = [
            "find /mnt/daos -mindepth 1 -maxdepth 1 -print0 | xargs -0r rm -rf"
        ]
        if self.runner.job.yaml_params.is_nvme():
            clean_cmds.append("sudo rm -rf {0};  \
                               sudo umount {0}".format(scm_mount))
        # scm_mount can be /mnt/daos0 or /mnt/daos1 for two daos_server
        # instances. Presently, not supported in DAOS. The for loop needs
        # to be updated in future to handle it. Single instance pmem
        # device should work now.
        if self.runner.job.yaml_params.is_scm():
            for value in scm_list:
                clean_cmds.append("sudo umount {}; \
                                   sudo wipefs -a {}".format(scm_mount, value))
        self.log.info("Cleanup of %s directory.", str(scm_mount))
        pcmd(self._hosts, "; ".join(clean_cmds), False)
Пример #6
0
class ServerManager(ExecutableCommand):
    """Defines object to manage server functions and launch server command."""
    # pylint: disable=pylint-no-self-use

    def __init__(self, daosbinpath, runnerpath, attach="/tmp", timeout=300):
        """Create a ServerManager object.

        Args:
            daosbinpath (str): Path to daos bin
            runnerpath (str): Path to Orterun binary.
            attach (str, optional): Defaults to "/tmp".
            timeout (int, optional): Time for the server to start.
                Defaults to 300.
        """
        super(ServerManager, self).__init__("/run/server_manager/*", "", "")

        self.daosbinpath = daosbinpath
        self._hosts = None

        # Setup orterun command defaults
        self.runner = Orterun(
            DaosServer(self.daosbinpath), runnerpath, True)

        # Setup server command defaults
        self.runner.job.action.value = "start"
        self.runner.job.get_action_command()

        # Set server environment
        os.environ["CRT_ATTACH_INFO_PATH"] = attach

        # Parameters that user can specify in the test yaml to modify behavior.
        self.debug = BasicParameter(None, True)       # ServerCommand param
        self.attach = BasicParameter(None, attach)    # ServerCommand param
        self.insecure = BasicParameter(None, True)    # ServerCommand param
        self.recreate = BasicParameter(None, True)    # ServerCommand param
        self.sudo = BasicParameter(None, False)       # ServerCommand param
        self.srv_timeout = BasicParameter(None, timeout)   # ServerCommand param
        self.report_uri = BasicParameter(None)             # Orterun param
        self.enable_recovery = BasicParameter(None, True)  # Orterun param
        self.export = BasicParameter(None)                 # Orterun param

    @property
    def hosts(self):
        """Hosts attribute getter."""
        return self._hosts

    @hosts.setter
    def hosts(self, value):
        """Hosts attribute setter

        Args:
            value (tuple): (list of hosts, workdir, slots)
        """
        self._hosts, workdir, slots = value
        self.runner.processes.value = len(self._hosts)
        self.runner.hostfile.value = write_host_file(
            self._hosts, workdir, slots)
        self.runner.job.server_cnt = len(self._hosts)

    def get_params(self, test):
        """Get values from the yaml file and assign them respectively
            to the server command and the orterun command.

        Args:
            test (Test): avocado Test object
        """
        server_params = ["debug", "sudo", "srv_timeout"]
        server_start_params = ["attach", "insecure", "recreate"]
        runner_params = ["enable_recovery", "export", "report_uri"]
        super(ServerManager, self).get_params(test)
        self.runner.job.yaml_params.get_params(test)
        self.runner.get_params(test)
        for name in self.get_param_names():
            if name in server_params:
                if name == "sudo":
                    setattr(self.runner.job, name, getattr(self, name).value)
                elif name == "srv_timeout":
                    setattr(self.runner.job, name, getattr(self, name).value)
                else:
                    getattr(
                        self.runner.job, name).value = getattr(self, name).value
            if name in server_start_params:
                getattr(self.runner.job.action_command, name).value = \
                    getattr(self, name).value
            if name in runner_params:
                getattr(self.runner, name).value = getattr(self, name).value

    def run(self):
        """Execute the runner subprocess."""
        self.log.info("Start CMD>>> %s", str(self.runner))
        return self.runner.run()

    def start(self, yamlfile):
        """Start the server through the runner."""
        self.runner.job.set_config(yamlfile)
        self.server_clean()

        # Prepare nvme storage in servers
        if self.runner.job.yaml_params.is_nvme():
            self.log.info("Performing nvme storage prepare in <format> mode")
            storage_prepare(self._hosts, "root")
            self.runner.mca.value = {"plm_rsh_args": "-l root"}

            # Make sure log file has been created for ownership change
            lfile = self.runner.job.yaml_params.server_params[-1].log_file.value
            if lfile is not None:
                self.log.info("Creating log file")
                cmd_touch_log = "touch {}".format(lfile)
                pcmd(self._hosts, cmd_touch_log, False)

            # Change ownership of attach info directory
            chmod_attach = "chmod 777 -R {}".format(self.attach.value)
            pcmd(self._hosts, chmod_attach, False)

        try:
            self.run()
        except CommandFailure as details:
            self.log.info("<SERVER> Exception occurred: %s", str(details))
            # Kill the subprocess, anything that might have started
            self.kill()
            raise ServerFailed(
                "Failed to start server in {} mode.".format(
                    self.runner.job.mode))

        if self.runner.job.yaml_params.is_nvme() or \
           self.runner.job.yaml_params.is_scm():
            # Setup the hostlist to pass to dmg command
            servers_with_ports = [
                "{}:{}".format(host, self.runner.job.yaml_params.port)
                for host in self._hosts]

            # Format storage and wait for server to change ownership
            self.log.info("Formatting hosts: <%s>", self._hosts)
            storage_format(self.daosbinpath, ",".join(servers_with_ports))
            self.runner.job.mode = "normal"
            try:
                self.runner.job.check_subprocess_status(self.runner.process)
            except CommandFailure as error:
                self.log.info("Failed to start after format: %s", str(error))

            # Change ownership shared attach info file
            chmod_cmds = "sudo chmod 777 {}/daos_server.attach_info_tmp".format(
                self.attach.value)
            pcmd(self._hosts, chmod_cmds, False)

        return True

    def stop(self):
        """Stop the server through the runner."""
        self.log.info("Stopping servers")
        if self.runner.job.yaml_params.is_nvme():
            self.kill()
            storage_reset(self._hosts)
            # Make sure the mount directory belongs to non-root user
            self.log.info("Changing ownership of mount to non-root user")
            cmd = "sudo chown -R {0}:{0} /mnt/daos*".format(getpass.getuser())
            pcmd(self._hosts, cmd, False)
        else:
            try:
                self.runner.stop()
            except CommandFailure as error:
                raise ServerFailed("Failed to stop servers:{}".format(error))

    def server_clean(self):
        """Prepare the hosts before starting daos server."""
        # Kill any doas servers running on the hosts
        self.kill()
        # Clean up any files that exist on the hosts
        self.clean_files()

    def kill(self):
        """Forcably kill any daos server processes running on hosts.

        Sometimes stop doesn't get everything.  Really whack everything
        with this.

        """
        kill_cmds = [
            "sudo pkill '(daos_server|daos_io_server)' --signal INT",
            "sleep 5",
            "pkill '(daos_server|daos_io_server)' --signal KILL",
        ]
        self.log.info("Killing any server processes")
        pcmd(self._hosts, "; ".join(kill_cmds), False, None, None)

    def clean_files(self):
        """Clean the tmpfs on the servers."""
        clean_cmds = [
            "find /mnt/daos -mindepth 1 -maxdepth 1 -print0 | xargs -0r rm -rf"
        ]

        if self.runner.job.yaml_params.is_nvme() or \
           self.runner.job.yaml_params.is_scm():
            clean_cmds.append("sudo rm -rf /mnt/daos; sudo umount /mnt/daos")

        self.log.info("Cleanup of /mnt/daos directory.")
        pcmd(self._hosts, "; ".join(clean_cmds), False)