def get_ior_job_manager_command(self, custom_ior_cmd=None): """Get the MPI job manager command for IOR. Args: custom_ior_cmd (IorCommand): Custom IorCommand instance to create job_manager with. Returns: str: the path for the mpi job manager command """ # Initialize MpioUtils if IOR is running in MPIIO or DFS mode if self.ior_cmd.api.value in ["MPIIO", "POSIX", "DFS", "HDF5"]: mpio_util = MpioUtils() if mpio_util.mpich_installed(self.hostlist_clients) is False: self.fail("Exiting Test: Mpich not installed") else: self.fail("Unsupported IOR API") if custom_ior_cmd: self.job_manager = Mpirun(custom_ior_cmd, self.subprocess, "mpich") else: self.job_manager = Mpirun(self.ior_cmd, self.subprocess, "mpich") return self.job_manager
class MpiioTests(TestWithServers): """Run ROMIO, LLNL, MPI4PY and HDF5 test suites. :avocado: recursive """ def __init__(self, *args, **kwargs): """Initialize a TestWithServers object.""" super().__init__(*args, **kwargs) self.hostfile_clients_slots = None self.mpio = None def run_test(self, test_repo, test_name): """Execute function to be used by test functions below. test_repo --absolute or relative (to self.mpichinstall) location of test repository test_name --name of the test to be run """ # Required to run daos command load_mpi("openmpi") # Create pool self.add_pool(connect=False) # create container self.add_container(self.pool) # initialize MpioUtils self.mpio = MpioUtils() if not self.mpio.mpich_installed(self.hostlist_clients): self.fail("Exiting Test: Mpich not installed") # fix up a relative test_repo specification if test_repo[0] != '/': test_repo = os.path.join(self.mpio.mpichinstall, test_repo) # initialize test specific variables client_processes = self.params.get("np", '/run/client_processes/') try: # running tests result = self.mpio.run_mpiio_tests(self.hostfile_clients, self.pool.uuid, test_repo, test_name, client_processes, self.container.uuid) except MpioFailed as excep: self.fail("<{0} Test Failed> \n{1}".format(test_name, excep)) # Check output for errors for output in (result.stdout_text, result.stderr_text): match = re.findall( r"(non-zero exit code|MPI_Abort|MPI_ABORT|ERROR)", output) if match: self.log.info( "The following error messages have been detected in the %s " "output:", test_name) for item in match: self.log.info(" %s", item) self.fail( "Error messages detected in {} output".format(test_name))
class LlnlMpi4pyHdf5(TestWithServers): """ Runs LLNL, MPI4PY and HDF5 test suites. :avocado: recursive """ def __init__(self, *args, **kwargs): """Initialize a TestWithServers object.""" super(LlnlMpi4pyHdf5, self).__init__(*args, **kwargs) self.hostfile_clients_slots = None self.mpio = None def setUp(self): super(LlnlMpi4pyHdf5, self).setUp() # initialize a python pool object then create the underlying self.pool = TestPool( self.context, dmg_command=self.get_dmg_command()) self.pool.get_params(self) self.pool.create() def run_test(self, test_repo, test_name): """ Executable function to be used by test functions below test_repo --location of test repository test_name --name of the test to be run """ # initialize MpioUtils self.mpio = MpioUtils() if not self.mpio.mpich_installed(self.hostlist_clients): self.fail("Exiting Test: Mpich not installed") # initialise test specific variables client_processes = self.params.get("np", '/run/client_processes/') try: # running tests self.mpio.run_llnl_mpi4py_hdf5( self.hostfile_clients, self.pool.uuid, test_repo, test_name, client_processes) except MpioFailed as excep: self.fail("<{0} Test Failed> \n{1}".format(test_name, excep)) # Parsing output to look for failures # stderr directed to stdout stdout = os.path.join(self.logdir, "stdout") searchfile = open(stdout, "r") error_message = ["non-zero exit code", "MPI_Abort", "MPI_ABORT", "ERROR"] for line in searchfile: for error in error_message: if error in line: self.fail( "Test Failed with error_message: {}".format(error))
def ior_thread(self, pool, oclass, api, test, flags, results): """Start threads and wait until all threads are finished. Args: pool (object): pool handle oclass (str): IOR object class api (str): IOR api test (list): IOR test sequence flags (str): IOR flags results (queue): queue for returning thread results Returns: None """ processes = self.params.get("slots", "/run/ior/clientslots/*") container_info = {} mpio_util = MpioUtils() if mpio_util.mpich_installed(self.hostlist_clients) is False: self.fail("Exiting Test : Mpich not installed on :" " {}".format(self.hostfile_clients[0])) self.pool = pool # Define the arguments for the ior_runner_thread method ior_cmd = IorCommand() ior_cmd.get_params(self) ior_cmd.set_daos_params(self.server_group, self.pool) ior_cmd.daos_oclass.update(oclass) ior_cmd.api.update(api) ior_cmd.transfer_size.update(test[2]) ior_cmd.block_size.update(test[3]) ior_cmd.flags.update(flags) container_info["{}{}{}" .format(oclass, api, test[2])] = str(uuid.uuid4()) # Define the job manager for the IOR command manager = Mpirun(ior_cmd, mpitype="mpich") manager.job.daos_cont.update(container_info ["{}{}{}".format(oclass, api, test[2])]) env = ior_cmd.get_default_env(str(manager)) manager.assign_hosts(self.hostlist_clients, self.workdir, None) manager.assign_processes(processes) manager.assign_environment(env, True) # run IOR Command try: manager.run() except CommandFailure as _error: results.put("FAIL")
class Romio(TestWithServers): """ Runs Romio test. :avocado: recursive """ def __init__(self, *args, **kwargs): super(Romio, self).__init__(*args, **kwargs) # Initialize a TestWithServers object. self.hostfile_clients_slots = None self.mpio = None def test_romio(self): """ Test ID: DAOS-1994 Run Romio test provided in mpich package Testing various I/O functions provided in romio test suite :avocado: tags=all,mpiio,pr,small,romio """ # setting romio parameters romio_test_repo = self.params.get("romio_repo", '/run/romio/') # initialize MpioUtils self.mpio = MpioUtils() if self.mpio.mpich_installed(self.hostlist_clients) is False: self.fail("Exiting Test: Mpich not installed") try: # running romio self.mpio.run_romio(self.hostlist_clients, romio_test_repo) # Parsing output to look for failures # stderr directed to stdout stdout = self.logdir + "/stdout" searchfile = open(stdout, "r") error_message = ["non-zero exit code", "MPI_Abort", "errors", "failed to create pool", "failed to parse pool UUID", "failed to destroy pool"] for line in searchfile: for i, _ in enumerate(error_message): if error_message[i] in line: self.fail("Romio Test Failed with error_message: " "{}".format(error_message[i])) except (MpioFailed) as excep: self.fail("<Romio Test Failed> \n{}".format(excep))
def get_job_manager_command(self): """Get the MPI job manager command for IOR. Returns: str: the path for the mpi job manager command """ # Initialize MpioUtils if IOR is running in MPIIO mode if self.ior_cmd.api.value == "MPIIO": mpio_util = MpioUtils() if mpio_util.mpich_installed(self.hostlist_clients) is False: self.fail("Exiting Test: Mpich not installed") return os.path.join(mpio_util.mpichinstall, "bin", "mpirun") return self.orterun
def get_mdtest_job_manager_command(self, manager): """Get the MPI job manager command for Mdtest. Returns: JobManager: the object for the mpi job manager command """ # Initialize MpioUtils if mdtest needs to be run using mpich if manager == "MPICH": mpio_util = MpioUtils() if mpio_util.mpich_installed(self.hostlist_clients) is False: self.fail("Exiting Test: Mpich not installed") return Mpirun(self.mdtest_cmd, mpitype="mpich") return Orterun(self.mdtest_cmd)
def get_ior_job_manager_command(self): """Get the MPI job manager command for IOR. Returns: str: the path for the mpi job manager command """ # Initialize MpioUtils if IOR is running in MPIIO or DAOS mode if self.ior_cmd.api.value in ["MPIIO", "DAOS", "POSIX", "DFS"]: mpio_util = MpioUtils() if mpio_util.mpich_installed(self.hostlist_clients) is False: self.fail("Exiting Test: Mpich not installed") else: self.fail("Unsupported IOR API") return Mpirun(self.ior_cmd, mpitype="mpich")
def ior_bg_thread(self, results): """Start IOR Background thread, This will write small data set and keep reading it in loop until it fails or main program exit. Args: results (queue): queue for returning thread results """ mpio_util = MpioUtils() if mpio_util.mpich_installed(self.hostlist_clients) is False: self.fail("Exiting Test: Mpich not installed") # Define the IOR Command and use the parameter from yaml file. ior_bg_cmd = IorCommand() ior_bg_cmd.get_params(self) ior_bg_cmd.set_daos_params(self.server_group, self.pool) ior_bg_cmd.dfs_oclass.update(self.ior_cmd.dfs_oclass.value) ior_bg_cmd.api.update(self.ior_cmd.api.value) ior_bg_cmd.transfer_size.update(self.ior_scm_xfersize) ior_bg_cmd.block_size.update(self.ior_cmd.block_size.value) ior_bg_cmd.flags.update(self.ior_cmd.flags.value) ior_bg_cmd.test_file.update('/testfile_background') # Define the job manager for the IOR command self.job_manager = Mpirun(ior_bg_cmd, mpitype="mpich") self.create_cont() self.job_manager.job.dfs_cont.update(self.container.uuid) env = ior_bg_cmd.get_default_env(str(self.job_manager)) self.job_manager.assign_hosts(self.hostlist_clients, self.workdir, None) self.job_manager.assign_processes(1) self.job_manager.assign_environment(env, True) print('----Run IOR in Background-------') # run IOR Write Command try: self.job_manager.run() except (CommandFailure, TestFail) as _error: results.put("FAIL") return # run IOR Read Command in loop ior_bg_cmd.flags.update(self.ior_read_flags) while True: try: self.job_manager.run() except (CommandFailure, TestFail) as _error: results.put("FAIL") break
def ior_thread(self, pool, oclass, api, test, flags, results): """This method calls job manager for IOR command invocation. Args: pool (object): pool handle oclass (str): IOR object class API (str): IOR API test (list): IOR test sequence flags (str): IOR flags results (queue): queue for returning thread results """ processes = self.params.get("slots", "/run/ior/clientslots/*") mpio_util = MpioUtils() if mpio_util.mpich_installed(self.hostlist_clients) is False: self.fail("Exiting Test: Mpich not installed") self.pool = pool # Define the arguments for the ior_runner_thread method ior_cmd = IorCommand() ior_cmd.get_params(self) ior_cmd.set_daos_params(self.server_group, self.pool) ior_cmd.dfs_oclass.update(oclass) ior_cmd.api.update(api) ior_cmd.transfer_size.update(test[0]) ior_cmd.block_size.update(test[1]) ior_cmd.flags.update(flags) if "-w" in flags: self.container_info["{}{}{}" .format(oclass, api, test[0])] = str(uuid.uuid4()) # Define the job manager for the IOR command manager = Mpirun(ior_cmd, mpitype="mpich") key = "".join([oclass, api, str(test[0])]) manager.job.dfs_cont.update(self.container_info[key]) env = ior_cmd.get_default_env(str(manager)) manager.assign_hosts(self.hostlist_clients, self.workdir, None) manager.assign_processes(processes) manager.assign_environment(env, True) # run IOR Command try: manager.run() except CommandFailure as _error: results.put("FAIL")
class MpiioTests(TestWithServers): """ Runs ROMIO, LLNL, MPI4PY and HDF5 test suites. :avocado: recursive """ def __init__(self, *args, **kwargs): """Initialize a TestWithServers object.""" super(MpiioTests, self).__init__(*args, **kwargs) self.hostfile_clients_slots = None self.mpio = None self.daos_cmd = None self.cont_uuid = None def setUp(self): super(MpiioTests, self).setUp() # initialize daos_cmd self.daos_cmd = DaosCommand(self.bin) # initialize a python pool object then create the underlying self.pool = TestPool(self.context, dmg_command=self.get_dmg_command()) self.pool.get_params(self) self.pool.create() def _create_cont(self): """Create a container. Args: daos_cmd (DaosCommand): daos command to issue the container create Returns: str: UUID of the created container """ cont_type = self.params.get("type", "/run/container/*") result = self.daos_cmd.container_create(pool=self.pool.uuid, svc=self.pool.svc_ranks, cont_type=cont_type) # Extract the container UUID from the daos container create output cont_uuid = re.findall("created\s+container\s+([0-9a-f-]+)", result.stdout) if not cont_uuid: self.fail("Error obtaining the container uuid from: {}".format( result.stdout)) self.cont_uuid = cont_uuid[0] def run_test(self, test_repo, test_name): """ Executable function to be used by test functions below test_repo --location of test repository test_name --name of the test to be run """ # initialize MpioUtils self.mpio = MpioUtils() if not self.mpio.mpich_installed(self.hostlist_clients): self.fail("Exiting Test: Mpich not installed") # initialize test specific variables client_processes = self.params.get("np", '/run/client_processes/') # create container self._create_cont() try: # running tests self.mpio.run_mpiio_tests(self.hostfile_clients, self.pool.uuid, self.pool.svc_ranks, test_repo, test_name, client_processes, self.cont_uuid) except MpioFailed as excep: self.fail("<{0} Test Failed> \n{1}".format(test_name, excep)) # Parsing output to look for failures # stderr directed to stdout stdout = os.path.join(self.logdir, "stdout") searchfile = open(stdout, "r") error_message = [ "non-zero exit code", "MPI_Abort", "MPI_ABORT", "ERROR" ] for line in searchfile: for error in error_message: if error in line: self.fail( "Test Failed with error_message: {}".format(error))
class MpiioTests(TestWithServers): """Run ROMIO, LLNL, MPI4PY and HDF5 test suites. :avocado: recursive """ def __init__(self, *args, **kwargs): """Initialize a TestWithServers object.""" super().__init__(*args, **kwargs) self.hostfile_clients_slots = None self.mpio = None self.daos_cmd = None self.cont_uuid = None def setUp(self): super().setUp() # initialize daos_cmd self.daos_cmd = DaosCommand(self.bin) # initialize a python pool object then create the underlying self.pool = TestPool(self.context, self.get_dmg_command()) self.pool.get_params(self) self.pool.create() def _create_cont(self): """Create a container. Args: daos_cmd (DaosCommand): daos command to issue the container create Returns: str: UUID of the created container """ cont_type = self.params.get("type", "/run/container/*") result = self.daos_cmd.container_create(pool=self.pool.uuid, cont_type=cont_type) # Extract the container UUID from the daos container create output cont_uuid = re.findall(r"created\s+container\s+([0-9a-f-]+)", result.stdout_text) if not cont_uuid: self.fail("Error obtaining the container uuid from: {}".format( result.stdout_text)) self.cont_uuid = cont_uuid[0] def run_test(self, test_repo, test_name): """Execute function to be used by test functions below. test_repo --location of test repository test_name --name of the test to be run """ # Required to run daos command load_mpi("openmpi") # create container self._create_cont() # initialize MpioUtils self.mpio = MpioUtils() if not self.mpio.mpich_installed(self.hostlist_clients): self.fail("Exiting Test: Mpich not installed") # initialize test specific variables client_processes = self.params.get("np", '/run/client_processes/') try: # running tests result = self.mpio.run_mpiio_tests(self.hostfile_clients, self.pool.uuid, test_repo, test_name, client_processes, self.cont_uuid) except MpioFailed as excep: self.fail("<{0} Test Failed> \n{1}".format(test_name, excep)) # Check output for errors for output in (result.stdout_text, result.stderr_text): match = re.findall( r"(non-zero exit code|MPI_Abort|MPI_ABORT|ERROR)", output) if match: self.log.info( "The following error messages have been detected in the %s " "output:", test_name) for item in match: self.log.info(" %s", item) self.fail( "Error messages detected in {} output".format(test_name))
class LlnlMpi4pyHdf5(TestWithServers): """ Runs LLNL, MPI4PY and HDF5 test suites. :avocado: recursive """ def __init__(self, *args, **kwargs): """Initialize a TestWithServers object.""" super(LlnlMpi4pyHdf5, self).__init__(*args, **kwargs) self.hostfile_clients_slots = None self.mpio = None def setUp(self): super(LlnlMpi4pyHdf5, self).setUp() try: # parameters used in pool create createmode = self.params.get("mode", '/run/pool/createmode/*/') createuid = os.geteuid() creategid = os.getegid() createsetid = self.params.get("setname", '/run/pool/createset/') createsize = self.params.get("size", '/run/pool/createsize/') self.createsvc = self.params.get("svcn", '/run/pool/createsvc/') # initialize a python pool object then create the underlying # daos storage self.pool = DaosPool(self.context) self.pool.create(createmode, createuid, creategid, createsize, createsetid, None, None, self.createsvc) except (DaosApiError) as excep: self.fail("<Test Failed at pool create> \n{}".format(excep)) def run_test(self, test_repo, test_name): """ Executable function to be used by test functions below test_repo --location of test repository test_name --name of the test to be run """ # initialize MpioUtils self.mpio = MpioUtils() if not self.mpio.mpich_installed(self.hostlist_clients): self.fail("Exiting Test: Mpich not installed") try: # initialise test specific variables client_processes = self.params.get("np", '/run/client_processes/') # obtaining pool uuid and svc list pool_uuid = self.pool.get_uuid_str() svc_list = "" for i in range(self.createsvc): svc_list += str(int(self.pool.svc.rl_ranks[i])) + ":" svc_list = svc_list[:-1] # running tests self.mpio.run_llnl_mpi4py_hdf5(self.hostfile_clients, pool_uuid, test_repo, test_name, client_processes) # Parsing output to look for failures # stderr directed to stdout stdout = self.logdir + "/stdout" searchfile = open(stdout, "r") error_message = [ "non-zero exit code", "MPI_Abort", "MPI_ABORT", "ERROR" ] for line in searchfile: # pylint: disable=C0200 for i in range(len(error_message)): if error_message[i] in line: self.fail("Test Failed with error_message: {}".format( error_message[i])) except (MpioFailed, DaosApiError) as excep: self.fail("<{0} Test Failed> \n{1}".format(test_name, excep))
class Romio(Test): """ Runs Romio test. """ def __init__(self, *args, **kwargs): super(Romio, self).__init__(*args, **kwargs) self.basepath = None self.server_group = None self.context = None self.hostlist_servers = None self.hostfile_servers = None self.hostlist_clients = None self.hostfile_clients = None def setUp(self): self.agent_sessions = None # get paths from the build_vars generated by build with open('../../../.build_vars.json') as build_file: build_paths = json.load(build_file) self.basepath = os.path.normpath(build_paths['PREFIX'] + "/../") self.server_group = self.params.get("name", '/server_config/', 'daos_server') # setup the DAOS python API self.context = DaosContext(build_paths['PREFIX'] + '/lib/') self.hostlist_servers = self.params.get("test_servers", '/run/hosts/') self.hostfile_servers = (write_host_file.write_host_file( self.hostlist_servers, self.workdir)) print("Host file servers is: {}".format(self.hostfile_servers)) self.hostlist_clients = self.params.get("test_clients", '/run/hosts/') self.hostfile_clients = (write_host_file.write_host_file( self.hostlist_clients, self.workdir)) print("Host file clients is: {}".format(self.hostfile_clients)) # start servers self.agent_sessions = agent_utils.run_agent(self.basepath, self.hostlist_servers, self.hostlist_clients) server_utils.run_server(self.hostfile_servers, self.server_group, self.basepath) self.mpio = None def tearDown(self): if self.agent_sessions: agent_utils.stop_agent(self.agent_sessions, self.hostlist_clients) server_utils.stop_server(hosts=self.hostlist_servers) @skipForTicket("CORCI-635") def test_romio(self): """ Test ID: DAOS-1994 Run Romio test provided in mpich package Testing various I/O functions provided in romio test suite :avocado: tags=all,mpiio,pr,small,romio """ # setting romio parameters romio_test_repo = self.params.get("romio_repo", '/run/romio/') # initialize MpioUtils self.mpio = MpioUtils() if self.mpio.mpich_installed(self.hostlist_clients) is False: self.fail("Exiting Test: Mpich not installed") try: # Romio do not need slots in hostfile with open(self.hostfile_clients) as client_file: new_text = client_file.read().replace('slots=1', '') with open(self.hostfile_clients, "w") as client_file: client_file.write(new_text) # running romio self.mpio.run_romio(self.basepath, self.hostlist_clients, romio_test_repo) # Parsing output to look for failures # stderr directed to stdout stdout = self.logdir + "/stdout" searchfile = open(stdout, "r") error_message = [ "non-zero exit code", "MPI_Abort", "errors", "failed to create pool", "failed to parse pool UUID", "failed to destroy pool" ] for line in searchfile: for i in xrange(len(error_message)): if error_message[i] in line: self.fail("Romio Test Failed with error_message: " "{}".format(error_message[i])) except (MpioFailed) as excep: self.fail("<Romio Test Failed> \n{}".format(excep))
class LlnlMpi4py(Test): """ Runs LLNL and MPI4PY test suites. """ def setUp(self): # initialising variables self.basepath = None self.server_group = None self.context = None self.pool = None self.mpio = None self.agent_sessions = None self.hostlist_servers = None self.hostfile_servers = None self.hostlist_clients = None self.hostfile_clients = None self.createsvc = None # get paths from the build_vars generated by build with open('../../../.build_vars.json') as var_file: build_paths = json.load(var_file) self.basepath = os.path.normpath(build_paths['PREFIX'] + "/../") self.server_group = self.params.get("name", '/server_config/', 'daos_server') # setup the DAOS python API self.context = DaosContext(build_paths['PREFIX'] + '/lib/') self.hostlist_servers = self.params.get("test_servers", '/run/hosts/') self.hostfile_servers = write_host_file.write_host_file( self.hostlist_servers, self.workdir) print("Host file servers is: {}".format(self.hostfile_servers)) self.hostlist_clients = self.params.get("test_clients", '/run/hosts/') self.hostfile_clients = write_host_file.write_host_file( self.hostlist_clients, self.workdir, None) print("Host file clients is: {}".format(self.hostfile_clients)) self.agent_sessions = AgentUtils.run_agent(self.basepath, self.hostlist_servers, self.hostlist_clients) # start servers server_utils.run_server(self.hostfile_servers, self.server_group, self.basepath) try: # parameters used in pool create createmode = self.params.get("mode", '/run/pool/createmode/*/') createuid = os.geteuid() creategid = os.getegid() createsetid = self.params.get("setname", '/run/pool/createset/') createsize = self.params.get("size", '/run/pool/createsize/') self.createsvc = self.params.get("svcn", '/run/pool/createsvc/') # initialize a python pool object then create the underlying # daos storage self.pool = DaosPool(self.context) self.pool.create(createmode, createuid, creategid, createsize, createsetid, None, None, self.createsvc) except (DaosApiError) as excep: self.fail("<Test Failed at pool create> \n{1}".format(excep)) def tearDown(self): try: if self.pool is not None: self.pool.destroy(1) finally: if self.agent_sessions: AgentUtils.stop_agent(self.hostlist_clients, self.agent_sessions) server_utils.stop_server(hosts=self.hostlist_servers) def run_test(self, test_repo, test_name): """ Executable function to be used by test functions below test_repo --location of test repository test_name --name of the test to be run """ # initialize MpioUtils self.mpio = MpioUtils() if not self.mpio.mpich_installed(self.hostlist_clients): self.fail("Exiting Test: Mpich not installed") try: # initialise test specific variables client_processes = self.params.get("np", '/run/client_processes/') # obtaining pool uuid and svc list pool_uuid = self.pool.get_uuid_str() svc_list = "" for i in range(self.createsvc): svc_list += str(int(self.pool.svc.rl_ranks[i])) + ":" svc_list = svc_list[:-1] # running tests self.mpio.run_llnl_mpi4py(self.basepath, self.hostfile_clients, pool_uuid, test_repo, test_name, client_processes) # Parsing output to look for failures # stderr directed to stdout stdout = self.logdir + "/stdout" searchfile = open(stdout, "r") error_message = [ "non-zero exit code", "MPI_Abort", "MPI_ABORT", "ERROR" ] for line in searchfile: for i in range(len(error_message)): if error_message[i] in line: self.fail("Test Failed with error_message: {}".format( error_message[i])) except (MpioFailed, DaosApiError) as excep: self.fail("<{0} Test Failed> \n{1}".format(test_name, excep)) def test_llnl(self): """ Test ID: DAOS-2231 Run LLNL test provided in mpich package Testing various I/O functions provided in llnl test suite :avocado: tags=mpio,llnlmpi4py,llnl """ test_repo = self.params.get("llnl", '/run/test_repo/') self.run_test(test_repo, "llnl") def test_mpi4py(self): """ Test ID: DAOS-2231 Run LLNL test provided in mpich package Testing various I/O functions provided in llnl test suite :avocado: tags=mpio,llnlmpi4py,mpi4py """ test_repo = self.params.get("mpi4py", '/run/test_repo/') self.run_test(test_repo, "mpi4py")
def ior_runner_thread(self, results): """Start threads and wait until all threads are finished. Destroy the container at the end of this thread run. Args: results (queue): queue for returning thread results Returns: None """ processes = self.params.get("slots", "/run/ior/clientslots/*") container_info = {} cmd = DaosCommand(os.path.join(self.prefix, "bin")) cmd.set_sub_command("container") cmd.sub_command_class.set_sub_command("destroy") mpio_util = MpioUtils() if mpio_util.mpich_installed(self.hostlist_clients) is False: self.fail("Exiting Test: Mpich not installed") # Iterate through IOR different value and run in sequence for oclass, api, test, flags in product(self.ior_dfs_oclass, self.ior_apis, self.ior_transfer_size, self.ior_flags): # Define the arguments for the ior_runner_thread method ior_cmd = IorCommand() ior_cmd.get_params(self) ior_cmd.set_daos_params(self.server_group, self.pool) ior_cmd.dfs_oclass.update(oclass) ior_cmd.api.update(api) ior_cmd.transfer_size.update(test[0]) ior_cmd.block_size.update(test[1]) ior_cmd.flags.update(flags) container_info["{}{}{}" .format(oclass, api, test[0])] = str(uuid.uuid4()) # Define the job manager for the IOR command manager = Mpirun(ior_cmd, mpitype="mpich") manager.job.dfs_cont.update(container_info ["{}{}{}".format(oclass, api, test[0])]) env = ior_cmd.get_default_env(str(manager)) manager.assign_hosts(self.hostlist_clients, self.workdir, None) manager.assign_processes(processes) manager.assign_environment(env, True) # run IOR Command try: manager.run() except CommandFailure as _error: results.put("FAIL") # Destroy the container created by thread for key in container_info: cmd.sub_command_class.sub_command_class.pool.value = self.pool.uuid cmd.sub_command_class.sub_command_class.svc.value = \ self.pool.svc_ranks cmd.sub_command_class.sub_command_class.cont.value = \ container_info[key] try: cmd._get_result() except CommandFailure as _error: results.put("FAIL")
class EightServers(Test): """ Test class Description: Runs IOR with 8 servers. """ def __init__(self, *args, **kwargs): super(EightServers, self).__init__(*args, **kwargs) self.basepath = None self.server_group = None self.context = None self.pool = None self.num_procs = None self.hostlist_servers = None self.hostfile_servers = None self.hostlist_clients = None self.hostfile_clients = None self.mpio = None def setUp(self): # get paths from the build_vars generated by build with open('../../../.build_vars.json') as build_file: build_paths = json.load(build_file) self.basepath = os.path.normpath(build_paths['PREFIX'] + "/../") print("<<{}>>".format(self.basepath)) self.server_group = self.params.get("name", '/server_config/', 'daos_server') # setup the DAOS python API self.context = DaosContext(build_paths['PREFIX'] + '/lib/') self.hostlist_servers = self.params.get("test_servers", '/run/hosts/test_machines/*') self.hostfile_servers = (write_host_file.write_host_file( self.hostlist_servers, self.workdir)) print("Host file servers is: {}".format(self.hostfile_servers)) self.hostlist_clients = self.params.get("test_clients", '/run/hosts/test_machines/*') self.num_procs = self.params.get("np", '/run/ior/client_processes/*') self.hostfile_clients = (write_host_file.write_host_file( self.hostlist_clients, self.workdir, None)) print("Host file clients is: {}".format(self.hostfile_clients)) server_utils.run_server(self.hostfile_servers, self.server_group, self.basepath) def tearDown(self): try: if self.pool is not None and self.pool.attached: self.pool.destroy(1) finally: server_utils.stop_server(hosts=self.hostlist_servers) def executable(self, iorflags=None): """ Executable function to run ior for ssf and fpp """ # parameters used in pool create createmode = self.params.get("mode", '/run/pool/createmode/*/') createuid = os.geteuid() creategid = os.getegid() createsetid = self.params.get("setname", '/run/pool/createset/') createscm_size = self.params.get("scm_size", '/run/pool/createsize/') createnvme_size = self.params.get("nvme_size", '/run/pool/createsize/') createsvc = self.params.get("svcn", '/run/pool/createsvc/') iteration = self.params.get("iter", '/run/ior/iteration/') block_size = self.params.get("b", '/run/ior/transfersize_blocksize/*/') transfer_size = self.params.get("t", '/run/ior/transfersize_blocksize/*/') try: # initialize MpioUtils self.mpio = MpioUtils() if self.mpio.mpich_installed(self.hostlist_clients) is False: self.fail("Exiting Test: Mpich not installed") #print self.mpio.mpichinstall # initialize a python pool object then create the underlying # daos storage self.pool = DaosPool(self.context) self.pool.create(createmode, createuid, creategid, createscm_size, createsetid, None, None, createsvc, createnvme_size) pool_uuid = self.pool.get_uuid_str() svc_list = "" for i in range(createsvc): svc_list += str(int(self.pool.svc.rl_ranks[i])) + ":" svc_list = svc_list[:-1] print("svc_list: {}".format(svc_list)) ior_utils.run_ior_mpiio(self.basepath, self.mpio.mpichinstall, pool_uuid, svc_list, self.num_procs, self.hostfile_clients, iorflags, iteration, transfer_size, block_size, True) except (DaosApiError, MpioFailed) as excep: print(excep) def test_ssf(self): """ Test ID: DAOS-2121 Test Description: Run IOR with 1,64 and 128 clients config in ssf mode. Use Cases: Different combinations of 1/64/128 Clients, 1K/4K/32K/128K/512K/1M transfersize and block size of 32M for 1K transfer size and 128M for rest. :avocado: tags=ior,mpiio,eightservers,ior_ssf """ ior_flags = self.params.get("F", '/run/ior/iorflags/ssf/') self.executable(ior_flags) def test_fpp(self): """ Test ID: DAOS-2121 Test Description: Run IOR with 1,64 and 128 clients config in fpp mode. Use Cases: Different combinations of 1/64/128 Clients, 1K/4K/32K/128K/512K/1M transfersize and block size of 32M for 1K transfer size and 128M for rest. :avocado: tags=ior,mpiio,eightservers,ior_fpp """ ior_flags = self.params.get("F", '/run/ior/iorflags/fpp/') self.executable(ior_flags)
class LlnlMpi4pyHdf5(TestWithServers): """ Runs LLNL, MPI4PY and HDF5 test suites. :avocado: recursive """ def setUp(self): super(LlnlMpi4pyHdf5, self).setUp() # initialising variables self.mpio = None self.hostfile_clients = None # setting client variables self.hostfile_clients = write_host_file.write_host_file( self.hostlist_clients, self.workdir, None) try: # parameters used in pool create createmode = self.params.get("mode", '/run/pool/createmode/*/') createuid = os.geteuid() creategid = os.getegid() createsetid = self.params.get("setname", '/run/pool/createset/') createsize = self.params.get("size", '/run/pool/createsize/') self.createsvc = self.params.get("svcn", '/run/pool/createsvc/') # initialize a python pool object then create the underlying # daos storage self.pool = DaosPool(self.context) self.pool.create(createmode, createuid, creategid, createsize, createsetid, None, None, self.createsvc) except (DaosApiError) as excep: self.fail("<Test Failed at pool create> \n{1}".format(excep)) def run_test(self, test_repo, test_name): """ Executable function to be used by test functions below test_repo --location of test repository test_name --name of the test to be run """ # initialize MpioUtils self.mpio = MpioUtils() if not self.mpio.mpich_installed(self.hostlist_clients): self.fail("Exiting Test: Mpich not installed") try: # initialise test specific variables client_processes = self.params.get("np", '/run/client_processes/') # obtaining pool uuid and svc list pool_uuid = self.pool.get_uuid_str() svc_list = "" for i in range(self.createsvc): svc_list += str(int(self.pool.svc.rl_ranks[i])) + ":" svc_list = svc_list[:-1] # running tests self.mpio.run_llnl_mpi4py_hdf5(self.basepath, self.hostfile_clients, pool_uuid, test_repo, test_name, client_processes) # Parsing output to look for failures # stderr directed to stdout stdout = self.logdir + "/stdout" searchfile = open(stdout, "r") error_message = ["non-zero exit code", "MPI_Abort", "MPI_ABORT", "ERROR"] for line in searchfile: # pylint: disable=C0200 for i in range(len(error_message)): if error_message[i] in line: self.fail("Test Failed with error_message: {}" .format(error_message[i])) except (MpioFailed, DaosApiError) as excep: self.fail("<{0} Test Failed> \n{1}".format(test_name, excep)) @skipForTicket("CORCI-635") def test_llnl(self): """ Jira ID: DAOS-2231 Test Description: Run LLNL test suite. Testing various I/O functions provided in llnl test suite such as:- test_collective, test_datareps, test_errhandlers, test_filecontrol, test_localpointer, test_manycomms, test_manyopens, test_openclose, test_openmodes, test_nb_collective, test_nb_localpointer, test_nb_rdwr, test_nb_readwrite, test_rdwr, test_readwrite :avocado: tags=all,mpiio,smoke,pr,small,llnlmpi4py """ test_repo = self.params.get("llnl", '/run/test_repo/') self.run_test(test_repo, "llnl") @skipForTicket("CORCI-635") def test_mpi4py(self): """ Jira ID: DAOS-2231 Test Description: Run mpi4py io test provided in mpi4py package Testing various I/O functions provided in mpi4py test suite such as:- testReadWriteAt, testIReadIWriteAt, testReadWrite testIReadIWrite, testReadWriteAtAll, testIReadIWriteAtAll testReadWriteAtAllBeginEnd, testReadWriteAll testIReadIWriteAll, testReadWriteAllBeginEnd :avocado: tags=all,mpiio,pr,small,llnlmpi4py,mpi4py """ test_repo = self.params.get("mpi4py", '/run/test_repo/') self.run_test(test_repo, "mpi4py") @skipForTicket("CORCI-635") def test_hdf5(self): """ Jira ID: DAOS-2252 Test Description: Run HDF5 testphdf5 and t_shapesame provided in HDF5 package. Testing various I/O functions provided in HDF5 test suite such as:- test_fapl_mpio_dup, test_split_comm_access, test_page_buffer_access, test_file_properties, dataset_writeInd, dataset_readInd, dataset_writeAll, dataset_readAll, extend_writeInd, extend_readInd, extend_writeAll, extend_readAll,extend_writeInd2,none_selection_chunk, zero_dim_dset, multiple_dset_write, multiple_group_write, multiple_group_read, compact_dataset, collective_group_write, independent_group_read, big_dataset, coll_chunk1, coll_chunk2, coll_chunk3, coll_chunk4, coll_chunk5, coll_chunk6, coll_chunk7, coll_chunk8, coll_chunk9, coll_chunk10, coll_irregular_cont_write, coll_irregular_cont_read, coll_irregular_simple_chunk_write, coll_irregular_simple_chunk_read , coll_irregular_complex_chunk_write, coll_irregular_complex_chunk_read , null_dataset , io_mode_confusion, rr_obj_hdr_flush_confusion, chunk_align_bug_1,lower_dim_size_comp_test, link_chunk_collective_io_test, actual_io_mode_tests, no_collective_cause_tests, test_plist_ed, file_image_daisy_chain_test, test_dense_attr, test_partial_no_selection_coll_md_read :avocado: tags=mpio,llnlmpi4pyhdf5,hdf5 """ test_repo = self.params.get("hdf5", '/run/test_repo/') self.run_test(test_repo, "hdf5")
class EightServers(Test): """ Test class Description: Runs IOR with 8 servers. """ def __init__(self, *args, **kwargs): super(EightServers, self).__init__(*args, **kwargs) self.basepath = None self.server_group = None self.context = None self.pool = None self.num_procs = None self.hostlist_servers = None self.hostfile_servers = None self.hostlist_clients = None self.hostfile_clients = None self.mpio = None def setUp(self): # get paths from the build_vars generated by build with open('../../../.build_vars.json') as build_file: build_paths = json.load(build_file) self.basepath = os.path.normpath(build_paths['PREFIX'] + "/../") print("<<{}>>".format(self.basepath)) self.server_group = self.params.get("name", '/server_config/', 'daos_server') # setup the DAOS python API self.context = DaosContext(build_paths['PREFIX'] + '/lib/') self.hostlist_servers = self.params.get("test_servers", '/run/hosts/test_machines/*') self.hostfile_servers = ( write_host_file.write_host_file(self.hostlist_servers, self.workdir)) print("Host file servers is: {}".format(self.hostfile_servers)) self.hostlist_clients = self.params.get("test_clients", '/run/hosts/test_machines/*') self.num_procs = self.params.get("np", '/run/ior/client_processes/*') self.hostfile_clients = ( write_host_file.write_host_file(self.hostlist_clients, self.workdir, None)) print("Host file clients is: {}".format(self.hostfile_clients)) server_utils.run_server(self.hostfile_servers, self.server_group, self.basepath) def tearDown(self): try: if self.pool is not None and self.pool.attached: self.pool.destroy(1) finally: server_utils.stop_server(hosts=self.hostlist_servers) def executable(self, iorflags=None): """ Executable function to run ior for ssf and fpp """ # parameters used in pool create createmode = self.params.get("mode", '/run/pool/createmode/*/') createuid = os.geteuid() creategid = os.getegid() createsetid = self.params.get("setname", '/run/pool/createset/') createscm_size = self.params.get("scm_size", '/run/pool/createsize/') createnvme_size = self.params.get("nvme_size", '/run/pool/createsize/') createsvc = self.params.get("svcn", '/run/pool/createsvc/') iteration = self.params.get("iter", '/run/ior/iteration/') block_size = self.params.get("b", '/run/ior/transfersize_blocksize/*/') transfer_size = self.params.get("t", '/run/ior/transfersize_blocksize/*/') try: # initialize MpioUtils self.mpio = MpioUtils() if self.mpio.mpich_installed(self.hostlist_clients) is False: self.fail("Exiting Test: Mpich not installed") #print self.mpio.mpichinstall # initialize a python pool object then create the underlying # daos storage self.pool = DaosPool(self.context) self.pool.create(createmode, createuid, creategid, createscm_size, createsetid, None, None, createsvc, createnvme_size) pool_uuid = self.pool.get_uuid_str() svc_list = "" for i in range(createsvc): svc_list += str(int(self.pool.svc.rl_ranks[i])) + ":" svc_list = svc_list[:-1] print ("svc_list: {}".format(svc_list)) ior_utils.run_ior_mpiio(self.basepath, self.mpio.mpichinstall, pool_uuid, svc_list, self.num_procs, self.hostfile_clients, iorflags, iteration, transfer_size, block_size, True) except (DaosApiError, MpioFailed) as excep: print(excep) def test_ssf(self): """ Test ID: DAOS-2121 Test Description: Run IOR with 1,64 and 128 clients config in ssf mode. Use Cases: Different combinations of 1/64/128 Clients, 1K/4K/32K/128K/512K/1M transfersize and block size of 32M for 1K transfer size and 128M for rest. :avocado: tags=ior,mpiio,eightservers,ior_ssf """ ior_flags = self.params.get("F", '/run/ior/iorflags/ssf/') self.executable(ior_flags) def test_fpp(self): """ Test ID: DAOS-2121 Test Description: Run IOR with 1,64 and 128 clients config in fpp mode. Use Cases: Different combinations of 1/64/128 Clients, 1K/4K/32K/128K/512K/1M transfersize and block size of 32M for 1K transfer size and 128M for rest. :avocado: tags=ior,mpiio,eightservers,ior_fpp """ ior_flags = self.params.get("F", '/run/ior/iorflags/fpp/') self.executable(ior_flags)
class Romio(Test): """ Runs Romio test. """ def __init__(self, *args, **kwargs): super(Romio, self).__init__(*args, **kwargs) self.basepath = None self.server_group = None self.context = None self.hostlist_servers = None self.hostfile_servers = None self.hostlist_clients = None self.hostfile_clients = None def setUp(self): self.agent_sessions = None # get paths from the build_vars generated by build with open('../../../.build_vars.json') as build_file: build_paths = json.load(build_file) self.basepath = os.path.normpath(build_paths['PREFIX'] + "/../") self.server_group = self.params.get("name", '/server_config/', 'daos_server') # setup the DAOS python API self.context = DaosContext(build_paths['PREFIX'] + '/lib/') self.hostlist_servers = self.params.get("test_servers", '/run/hosts/') self.hostfile_servers = ( write_host_file.write_host_file(self.hostlist_servers, self.workdir)) print("Host file servers is: {}".format(self.hostfile_servers)) self.hostlist_clients = self.params.get("test_clients", '/run/hosts/') self.hostfile_clients = ( write_host_file.write_host_file(self.hostlist_clients, self.workdir)) print("Host file clients is: {}".format(self.hostfile_clients)) # start servers self.agent_sessions = AgentUtils.run_agent(self.basepath, self.hostlist_servers, self.hostlist_clients) server_utils.run_server(self.hostfile_servers, self.server_group, self.basepath) self.mpio = None def tearDown(self): if self.agent_sessions: AgentUtils.stop_agent(self.hostlist_clients, self.agent_sessions) server_utils.stop_server(hosts=self.hostlist_servers) def test_romio(self): """ Test ID: DAOS-1994 Run Romio test provided in mpich package Testing various I/O functions provided in romio test suite :avocado: tags=mpio,romio """ # setting romio parameters romio_test_repo = self.params.get("romio_repo", '/run/romio/') # initialize MpioUtils self.mpio = MpioUtils() if self.mpio.mpich_installed(self.hostlist_clients) is False: self.fail("Exiting Test: Mpich not installed") try: # Romio do not need slots in hostfile with open(self.hostfile_clients) as client_file: new_text = client_file.read().replace('slots=1', '') with open(self.hostfile_clients, "w") as client_file: client_file.write(new_text) # running romio self.mpio.run_romio(self.basepath, self.hostlist_clients, romio_test_repo) # Parsing output to look for failures # stderr directed to stdout stdout = self.logdir + "/stdout" searchfile = open(stdout, "r") error_message = ["non-zero exit code", "MPI_Abort", "errors", "failed to create pool", "failed to parse pool UUID", "failed to destroy pool"] for line in searchfile: for i in xrange(len(error_message)): if error_message[i] in line: self.fail("Romio Test Failed with error_message: " "{}".format(error_message[i])) except (MpioFailed) as excep: self.fail("<Romio Test Failed> \n{}".format(excep))