def _get_collectibles(self, c_profiler): self.sysinfo_files = gather_collectibles_config(self.config) profiler = c_profiler if profiler is None: self.profiler = self.config.get("sysinfo.collect.profiler") else: self.profiler = profiler profiler_file = self.config.get("sysinfo.collectibles.profilers") if os.path.isfile(profiler_file): self.sysinfo_files["profilers"] = genio.read_all_lines( profiler_file) log.info("Profilers configured by file: %s", profiler_file) if not self.sysinfo_files["profilers"]: self.profiler = False if self.profiler is False: if not self.sysinfo_files["profilers"]: log.info("Profiler disabled: no profiler" " commands configured") else: log.info("Profiler disabled") else: log.debug("File %s does not exist.", profiler_file) self.sysinfo_files["profilers"] = []
def runServer(hostfile, setname, basepath): global sessions try: server_count = len(genio.read_all_lines(hostfile)) initial_cmd = "/bin/sh" server_cmd = basepath + "/install/bin/orterun --np {0} ".format( server_count) server_cmd += "--hostfile {0} --enable-recovery ".format(hostfile) server_cmd += "-x D_LOG_MASK=DEBUG,RPC=ERR,MEM=ERR -x D_LOG_FILE=" server_cmd += basepath + "/install/tmp/daos.log " server_cmd += "-x LD_LIBRARY_PATH={0}/install/lib:".format(basepath) server_cmd += "{0}/install/lib/daos_srv ".format(basepath) server_cmd += basepath + "/install/bin/daos_server -g {0} -c 1 ".format( setname) server_cmd += " -a" + basepath + "/install/tmp/" print "Start CMD>>>>{0}".format(server_cmd) sessions[setname] = aexpect.ShellSession(initial_cmd) if (sessions[setname].is_responsive()): sessions[setname].sendline(server_cmd) sessions[setname].read_until_any_line_matches( "DAOS server (v0.0.2) started on rank 0*", print_func=printFunc) print "<SERVER> server started" except Exception as e: print "<SERVER> Exception occurred: {0}".format(str(e)) raise ServerFailed("Server didn't start!")
def test_launch(self): host = self.params.get("hostname", '/tests/', "localhost") hostfile = self.params.get("hostfile1", '/files/', "/tmp/hostfile1") urifile = self.params.get("urifile", '/files/', "/tmp/urifile") server_count = len(genio.read_all_lines(hostfile)) get_prompt = "/bin/bash" launch_cmd = "../../install/bin/orterun --np {0} ".format(server_count) launch_cmd += "--hostfile {0} --enable-recovery ".format(hostfile) launch_cmd += "--report-uri {0} -x D_LOG_FILE=/mnt/shared/test/tmp/daos.log ".format( urifile) launch_cmd += "-x LD_LIBRARY_PATH=/home/skirvan/daos_m10/install/lib:/home/skirvan/daos_m10/install/lib/daos_srv " launch_cmd += "../../install/bin/daos_server -d /tmp/.daos -g daos_server" try: session = aexpect.ShellSession(get_prompt) if (session.is_responsive()): session.sendline(launch_cmd) session.read_until_any_line_matches( "XDAOS server (v0.0.2) started on rank *", timeout=5.0, print_func=printFunc) else: self.fail("Server did not start.\n") except Exception as e: self.fail("Server did not start.\n") session.sendcontrol("c")
def run_event(self, filename, perf_flags): for line in genio.read_all_lines(filename): cmd = "%s%s sleep 1" % (perf_flags, line) output = process.run(cmd, shell=True, ignore_status=True) if output.exit_status != 0: self.fail_cmd.append(cmd)
def runServer(hostfile, urifile): global session try: server_count = len(genio.read_all_lines(hostfile)) initial_cmd = "/bin/bash" server_cmd = "/home/skirvan/daos_m10/install/bin/orterun --np {0} ".format( server_count) server_cmd += "--hostfile {0} --enable-recovery ".format(hostfile) server_cmd += "--report-uri {0} -x DD_LOG=/mnt/shared/test/tmp/daos.log ".format( urifile) server_cmd += "-x LD_LIBRARY_PATH=/home/skirvan/daos_m10/install/lib:/home/skirvan/daos_m10/install/lib/daos_srv " server_cmd += "/home/skirvan/daos_m10/install/bin/daos_server -g daos_server" print "Start CMD>>>>{0}".format(server_cmd) session = aexpect.ShellSession(initial_cmd) if (session.is_responsive()): session.sendline(server_cmd) session.read_until_any_line_matches( "DAOS server (v0.0.2) started on rank 0*", print_func=printFunc) print "<SERVER> server started" except Exception as e: print "<SERVER> Exception occurred: {0}".format(str(e)) raise ServerFailed("Server didn't start!")
def test_probe(self): outpt = process.run("perf probe select_task_rq_fair:0", sudo=True) outpt = outpt.stderr.decode("utf-8") self._check_duplicate_probe(outpt) outpt = genio.read_all_lines("/sys/kernel/debug/tracing/kprobe_events") self._check_duplicate_probe(outpt) if self.fail_flag: self.fail("perf is placing multiple probes at the same location ")
def test_server_stderr(self): self.log.info('Testing server stderr collection') s = gdb.GDBServer() s.exit() self.assertTrue(os.path.exists(s.stderr_path)) stderr_lines = genio.read_all_lines(s.stderr_path) listening_line = "Listening on port %s\n" % s.port self.assertIn(listening_line, stderr_lines)
def get_depend_modules(self, module): """ Returns the dependent modules """ config_path = os.path.join(os.path.abspath(''), "module_unload_load.py.data/config") for line in genio.read_all_lines(config_path): if module == line.split('=')[0]: return line.split('=')[-1]
def built_in_module(self, module): """ checking whether the given module is built_in module or not """ path = "/lib/modules/%s/modules.builtin" % self.uname for each in genio.read_all_lines(path): out = process.getoutput(each.split('/')[-1]) if module == out.split('.'[0]): return True return False
def sysfs_value_check(self): ''' Checks if sysfs value matches to test value, Returns True if yes. Returns False otherwise. ''' path = '/sys/module/%s/parameters/%s' % (self.module, self.param_name) value_check = genio.read_all_lines(path) if self.param_value not in value_check: return False return True
def run_event(self, filename, eventname): if eventname == 'raw': perf_flags = "perf stat -e r" elif eventname == 'name': perf_flags = "perf stat -e " for line in genio.read_all_lines(filename): cmd = "%s%s sleep 1" % (perf_flags, line) output = process.run(cmd, shell=True, ignore_status=True) if output.exit_status != 0: self.fail_cmd.append(cmd)
def test_watch_point_check(self): if os.path.exists('/dev/wptest'): self.run_cmd() for line in genio.read_all_lines('/proc/kallsyms'): if 'arg1' in line: value = line.split(' ')[0] cmd = "perf record -e mem:0x%s &" % value process.run(cmd, ignore_bg_processes=True, ignore_status=True) self.run_cmd() else: self.fail("unable to find the directory")
def test_server_stdout(self): self.log.info('Testing server stdout/stderr collection') s = gdb.GDBServer() c = gdb.GDB() c.connect(s.port) c.set_file(self.return99_binary_path) c.run() s.exit() self.assertTrue(os.path.exists(s.stdout_path)) self.assertTrue(os.path.exists(s.stderr_path)) stdout_lines = genio.read_all_lines(s.stdout_path) self.assertIn("return 99\n", stdout_lines)
def test_probe(self): output = process.run("perf probe select_task_rq_fair:15", sudo=True) if 'select_task_rq_fair' in output.stderr.decode( "utf-8") and 'select_task_rq_fair_' in output.stderr.decode( "utf-8"): fail_flag = 1 output = genio.read_all_lines( "/sys/kernel/debug/tracing/kprobe_events") for line in output: if 'select_task_rq_fair' in line or 'select_task_rq_fair_' in line: fail_flag += 1 if (fail_flag >= 2): self.fail( "perf probe is placing multiple probe at the same location ")
def test_bsod(self): try: from PIL import Image from PIL import ImageDraw except ImportError: return text = ["DREADED BLUE SCREEN OF DEATH"] dmesg_path = os.path.join(self.job.logdir, "sysinfo", "pre", "dmesg_-c") self.log.info("dmesg_path: %s", dmesg_path) if os.path.exists(dmesg_path): text = genio.read_all_lines(dmesg_path)[0:50] bsod = Image.new("RGB", (640, 480), "blue") draw = ImageDraw.Draw(bsod) y = 2 for line in text: draw.text((2, y), line) y += 12 bsod.save(os.path.join(self.outputdir, "bsod.png"))
def gather_collectibles_config(config): sysinfo_files = {} for collectible in ["commands", "files", "fail_commands", "fail_files"]: tmp_file = config.get(f"sysinfo.collectibles.{collectible}") if os.path.isfile(tmp_file): log.info("%s configured by file: %s", collectible.title(), tmp_file) sysinfo_files[collectible] = genio.read_all_lines(tmp_file) else: log.debug("File %s does not exist.", tmp_file) sysinfo_files[collectible] = [] if "fail_" in collectible: list1 = sysinfo_files[collectible] list2 = sysinfo_files[collectible.split("_")[1]] sysinfo_files[collectible] = [ tmp for tmp in list1 if tmp not in list2 ] return sysinfo_files
def gather_collectibles_config(config): sysinfo_files = {} for collectible in ['commands', 'files', 'fail_commands', 'fail_files']: tmp_file = config.get(f'sysinfo.collectibles.{collectible}') if os.path.isfile(tmp_file): log.info('%s configured by file: %s', collectible.title(), tmp_file) sysinfo_files[collectible] = genio.read_all_lines(tmp_file) else: log.debug('File %s does not exist.', tmp_file) sysinfo_files[collectible] = [] if 'fail_' in collectible: list1 = sysinfo_files[collectible] list2 = sysinfo_files[collectible.split('_')[1]] sysinfo_files[collectible] = [ tmp for tmp in list1 if tmp not in list2 ] return sysinfo_files
def test_launch(self): """ Test launching a DAOS server. :avocado: tags=all,wireup,full_regression,tiny,launchserver """ hostfile = self.params.get("hostfile1", '/files/', "/tmp/hostfile1") urifile = self.params.get("urifile", '/files/', "/tmp/urifile") server_count = len(genio.read_all_lines(hostfile)) get_prompt = "/bin/bash" launch_cmd = "../../install/bin/orterun --np {0} ".format(server_count) launch_cmd += "--hostfile {0} --enable-recovery ".format(hostfile) launch_cmd += ( "--report-uri {0} " "-x D_LOG_FILE=/mnt/shared/test/tmp/daos.log " "-x LD_LIBRARY_PATH=/home/skirvan/daos_m10/install/lib" ":/home/skirvan/daos_m10/install/lib/daos_srv " "../../install/bin/daos_server --debug start -d /tmp/.daos " "-g daos_server".format(urifile)) try: session = aexpect.ShellSession(get_prompt) if session.is_responsive(): session.sendline(launch_cmd) session.read_until_any_line_matches( "XDAOS server (v0.0.2) started on rank *", timeout=5.0, print_func=print_helper) else: self.fail("Server did not start.\n") except (aexpect.ExpectError, aexpect.ExpectProcessTerminatedError, aexpect.ExpectTimeoutError, aexpect.ShellCmdError, aexpect.ShellError, aexpect.ShellProcessTerminatedError, aexpect.ShellStatusError, aexpect.ShellTimeoutError) as dummy_e: self.fail("Server did not start.\n") session.sendcontrol("c")
def test_launch(self): """ Test launching a DAOS server. """ hostfile = self.params.get("hostfile1", '/files/', "/tmp/hostfile1") urifile = self.params.get("urifile", '/files/', "/tmp/urifile") server_count = len(genio.read_all_lines(hostfile)) get_prompt = "/bin/bash" launch_cmd = "../../install/bin/orterun --np {0} ".format(server_count) launch_cmd += "--hostfile {0} --enable-recovery ".format(hostfile) launch_cmd += ( "--report-uri {0} " "-x D_LOG_FILE=/mnt/shared/test/tmp/daos.log " "-x LD_LIBRARY_PATH=/home/skirvan/daos_m10/install/lib" ":/home/skirvan/daos_m10/install/lib/daos_srv " "../../install/bin/daos_server -d /tmp/.daos " "-g daos_server".format(urifile) ) try: session = aexpect.ShellSession(get_prompt) if session.is_responsive(): session.sendline(launch_cmd) session.read_until_any_line_matches( "XDAOS server (v0.0.2) started on rank *", timeout=5.0, print_func=print_helper) else: self.fail("Server did not start.\n") except (aexpect.ExpectError, aexpect.ExpectProcessTerminatedError, aexpect.ExpectTimeoutError, aexpect.ShellCmdError, aexpect.ShellError, aexpect.ShellProcessTerminatedError, aexpect.ShellStatusError, aexpect.ShellTimeoutError) as dummy_e: self.fail("Server did not start.\n") session.sendcontrol("c")
def run_server(hostfile, setname, basepath, uri_path=None, env_dict=None): """ Launches DAOS servers in accordance with the supplied hostfile. """ global SESSIONS try: servers = ([ line.split(' ')[0] for line in genio.read_all_lines(hostfile) ]) server_count = len(servers) #Create the DAOS server configuration yaml file to pass #with daos_server -o <FILE_NAME> create_server_yaml(basepath) # first make sure there are no existing servers running kill_server(servers) # clean the tmpfs on the servers for server in servers: subprocess.check_call([ 'ssh', server, ("find /mnt/daos -mindepth 1 -maxdepth 1 " "-print0 | xargs -0r rm -rf") ]) # pile of build time variables with open(os.path.join(basepath, ".build_vars.json")) as json_vars: build_vars = json.load(json_vars) orterun_bin = os.path.join(build_vars["OMPI_PREFIX"], "bin", "orterun") daos_srv_bin = os.path.join(build_vars["PREFIX"], "bin", "daos_server") env_args = [] # Add any user supplied environment if env_dict is not None: for key, value in env_dict.items(): os.environ[key] = value env_args.extend(["-x", "{}={}".format(key, value)]) server_cmd = [orterun_bin, "--np", str(server_count)] if uri_path is not None: server_cmd.extend(["--report-uri", uri_path]) server_cmd.extend(["--hostfile", hostfile, "--enable-recovery"]) server_cmd.extend(env_args) # For now run server in insecure mode until Certificate tests are in place server_cmd.extend([ daos_srv_bin, "-i", "-a", os.path.join(basepath, "install", "tmp"), "-o", '{}/{}'.format(basepath, AVOCADO_FILE) ]) print("Start CMD>>>>{0}".format(' '.join(server_cmd))) resource.setrlimit(resource.RLIMIT_CORE, (resource.RLIM_INFINITY, resource.RLIM_INFINITY)) SESSIONS[setname] = subprocess.Popen(server_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) fdesc = SESSIONS[setname].stdout.fileno() fstat = fcntl.fcntl(fdesc, fcntl.F_GETFL) fcntl.fcntl(fdesc, fcntl.F_SETFL, fstat | os.O_NONBLOCK) timeout = 600 start_time = time.time() result = 0 pattern = "DAOS I/O server" expected_data = "Starting Servers\n" while True: output = "" try: output = SESSIONS[setname].stdout.read() except IOError as excpn: if excpn.errno != errno.EAGAIN: raise excpn continue match = re.findall(pattern, output) expected_data += output result += len(match) if not output or result == server_count or \ time.time() - start_time > timeout: print("<SERVER>: {}".format(expected_data)) if result != server_count: raise ServerFailed("Server didn't start!") break print("<SERVER> server started and took %s seconds to start" % \ (time.time() - start_time)) except Exception as error: print("<SERVER> Exception occurred: {0}".format(str(error))) traceback.print_exception(error.__class__, error, sys.exc_info()[2]) # we need to end the session now -- exit the shell try: SESSIONS[setname].send_signal(signal.SIGINT) time.sleep(5) # get the stderr error = SESSIONS[setname].stderr.read() if SESSIONS[setname].poll() is None: SESSIONS[setname].kill() retcode = SESSIONS[setname].wait() print("<SERVER> server start return code: {}\n" \ "stderr:\n{}".format(retcode, error)) except KeyError: pass raise ServerFailed("Server didn't start!")
def runServer(hostfile, setname, basepath, uri_path=None, env_dict=None): """ Launches DAOS servers in accordance with the supplied hostfile. """ global sessions try: servers = [ line.split(' ')[0] for line in genio.read_all_lines(hostfile) ] server_count = len(servers) # first make sure there are no existing servers running killServer(servers) # pile of build time variables with open(os.path.join(basepath, ".build_vars.json")) as json_vars: build_vars = json.load(json_vars) orterun_bin = os.path.join(build_vars["OMPI_PREFIX"], "bin", "orterun") daos_srv_bin = os.path.join(build_vars["PREFIX"], "bin", "daos_server") # before any set in env are added to env_args, add any user supplied # envirables to environment first if env_dict is not None: for k, v in env_dict.items(): os.environ[k] = v env_vars = [ 'CRT_.*', 'DAOS_.*', 'ABT_.*', 'DD_(STDERR|LOG)', 'D_LOG_.*', 'OFI_.*' ] env_args = [] for (env_var, env_val) in os.environ.items(): for pat in env_vars: if re.match(pat, env_var): env_args.extend(["-x", "{}={}".format(env_var, env_val)]) server_cmd = [orterun_bin, "--np", str(server_count)] if uri_path is not None: server_cmd.extend(["--report-uri", uri_path]) server_cmd.extend(["--hostfile", hostfile, "--enable-recovery"]) server_cmd.extend(env_args) server_cmd.extend([ "-x", "DD_SUBSYS=all", "-x", "DD_MASK=all", daos_srv_bin, "-g", setname, "-c", "1", "-a", os.path.join(basepath, "install", "tmp"), "-d", os.path.join(os.sep, "var", "run", "user", str(os.geteuid())) ]) print("Start CMD>>>>{0}".format(' '.join(server_cmd))) resource.setrlimit(resource.RLIMIT_CORE, (resource.RLIM_INFINITY, resource.RLIM_INFINITY)) sessions[setname] = subprocess.Popen(server_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) fd = sessions[setname].stdout.fileno() fl = fcntl.fcntl(fd, fcntl.F_GETFL) fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK) timeout = 600 start_time = time.time() result = 0 pattern = "DAOS I/O server" expected_data = "Starting Servers\n" while True: output = "" try: output = sessions[setname].stdout.read() except IOError as excpn: if excpn.errno != errno.EAGAIN: raise excpn continue match = re.findall(pattern, output) expected_data += output result += len(match) if not output or result == server_count or \ time.time() - start_time > timeout: print("<SERVER>: {}".format(expected_data)) if result != server_count: raise ServerFailed("Server didn't start!") break print("<SERVER> server started and took %s seconds to start" % \ (time.time() - start_time)) except Exception as excpn: print("<SERVER> Exception occurred: {0}".format(str(excpn))) # we need to end the session now -- exit the shell try: sessions[setname].send_signal(signal.SIGINT) time.sleep(5) # get the stderr error = sessions[setname].stderr.read() if sessions[setname].poll() is None: sessions[setname].kill() retcode = sessions[setname].wait() print("<SERVER> server start return code: {}\n" \ "stderr:\n{}".format(retcode, error)) except KeyError: pass raise ServerFailed("Server didn't start!")
def run_server(test, hostfile, setname, uri_path=None, env_dict=None, clean=True): # pylint: disable=unused-argument """Launch DAOS servers in accordance with the supplied hostfile. Args: test (Test): avocado Test object hostfile (str): hostfile defining on which hosts to start servers setname (str): session name uri_path (str, optional): path to uri file. Defaults to None. env_dict (dict, optional): dictionary on env variable names and values. Defaults to None. clean (bool, optional): clean the mount point. Defaults to True. Raises: ServerFailed: if there is an error starting the servers """ global SESSIONS # pylint: disable=global-variable-not-assigned try: servers = ([ line.split(' ')[0] for line in genio.read_all_lines(hostfile) ]) server_count = len(servers) # Pile of build time variables with open("../../.build_vars.json") as json_vars: build_vars = json.load(json_vars) # Create the DAOS server configuration yaml file to pass # with daos_server -o <FILE_NAME> print("Creating the server yaml file in {}".format(test.tmp)) server_yaml = os.path.join(test.tmp, AVOCADO_FILE) server_config = DaosServerConfig() server_config.get_params(test) access_points = ":".join((servers[0], str(server_config.port))) server_config.access_points.value = access_points.split() server_config.update_log_files(getattr(test, "control_log"), getattr(test, "helper_log"), getattr(test, "server_log")) server_config.create_yaml(server_yaml) # first make sure there are no existing servers running print("Removing any existing server processes") kill_server(servers) # clean the tmpfs on the servers if clean: print("Cleaning the server tmpfs directories") result = pcmd(servers, "find /mnt/daos -mindepth 1 -maxdepth 1 -print0 | " "xargs -0r rm -rf", verbose=False) if len(result) > 1 or 0 not in result: raise ServerFailed( "Error cleaning tmpfs on servers: {}".format(", ".join( [str(result[key]) for key in result if key != 0]))) load_mpi('openmpi') orterun_bin = find_executable('orterun') if orterun_bin is None: raise ServerFailed("Can't find orterun") server_cmd = [orterun_bin, "--np", str(server_count)] server_cmd.extend(["--mca", "btl_openib_warn_default_gid_prefix", "0"]) server_cmd.extend(["--mca", "btl", "tcp,self"]) server_cmd.extend(["--mca", "oob", "tcp"]) server_cmd.extend(["--mca", "pml", "ob1"]) server_cmd.extend(["--hostfile", hostfile]) server_cmd.extend(["--enable-recovery", "--tag-output"]) # Add any user supplied environment if env_dict is not None: for key, value in env_dict.items(): os.environ[key] = value server_cmd.extend(["-x", "{}={}".format(key, value)]) # the remote orte needs to know where to find daos, in the # case that it's not in the system prefix # but it should already be in our PATH, so just pass our # PATH along to the remote if build_vars["PREFIX"] != "/usr": server_cmd.extend(["-x", "PATH"]) # Run server in insecure mode until Certificate tests are in place server_cmd.extend([ os.path.join(build_vars["PREFIX"], "bin", "daos_server"), "--debug", "--config", server_yaml, "start", "-i", "--recreate-superblocks" ]) print("Start CMD>>>>{0}".format(' '.join(server_cmd))) resource.setrlimit(resource.RLIMIT_CORE, (resource.RLIM_INFINITY, resource.RLIM_INFINITY)) SESSIONS[setname] = subprocess.Popen(server_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) fdesc = SESSIONS[setname].stdout.fileno() fstat = fcntl.fcntl(fdesc, fcntl.F_GETFL) fcntl.fcntl(fdesc, fcntl.F_SETFL, fstat | os.O_NONBLOCK) timeout = 600 start_time = time.time() matches = 0 pattern = "DAOS I/O server.*started" expected_data = "Starting Servers\n" while True: output = "" try: output = SESSIONS[setname].stdout.read() except IOError as excpn: if excpn.errno != errno.EAGAIN: raise ServerFailed("Server didn't start: {}".format(excpn)) continue match = re.findall(pattern, output) expected_data += output matches += len(match) if not output or matches == server_count or \ time.time() - start_time > timeout: print("<SERVER>: {}".format(expected_data)) if matches != server_count: raise ServerFailed("Server didn't start!") break print("<SERVER> server started and took {} seconds to start".format( time.time() - start_time)) except Exception as error: print("<SERVER> Exception occurred: {0}".format(str(error))) traceback.print_exception(error.__class__, error, sys.exc_info()[2]) # We need to end the session now -- exit the shell try: SESSIONS[setname].send_signal(signal.SIGINT) time.sleep(5) # get the stderr error = SESSIONS[setname].stderr.read() if SESSIONS[setname].poll() is None: SESSIONS[setname].kill() retcode = SESSIONS[setname].wait() print("<SERVER> server start return code: {}\nstderr:\n{}".format( retcode, error)) except KeyError: pass raise ServerFailed("Server didn't start!")
def runServer(hostfile, setname, basepath): """ Launches DAOS servers in accordance with the supplied hostfile. """ global sessions try: server_count = len(genio.read_all_lines(hostfile)) # pile of build time variables with open(os.path.join(basepath, ".build_vars.json")) as json_vars: build_vars = json.load(json_vars) orterun_bin = os.path.join(build_vars["OMPI_PREFIX"], "bin/orterun") daos_srv_bin = os.path.join(build_vars["PREFIX"], "bin/daos_server") ld_lib_path = os.path.join(build_vars["PREFIX"], "lib") + os.pathsep + \ os.path.join(build_vars["PREFIX"], "lib/daos_srv") env_vars = ['CRT_.*', 'DAOS_.*', 'ABT_.*', 'DD_(STDERR|LOG)', 'D_LOG_.*', 'OFI_.*'] env_args = "" for env_var in os.environ.items(): for pat in env_vars: if not re.match(pat, env_var[0]): continue env_args += "-x {0}=\"{1}\" ".format(env_var, os.environ[env_var]) initial_cmd = "/bin/sh" server_cmd = orterun_bin + " --np {0} ".format(server_count) server_cmd += "--hostfile {0} --enable-recovery ".format(hostfile) server_cmd += env_args server_cmd += "-x DD_SUBSYS=all -x DD_MASK=all " server_cmd += "-x LD_LIBRARY_PATH={0} ".format(ld_lib_path) server_cmd += daos_srv_bin + " -g {0} -c 1 ".format(setname) server_cmd += " -a" + basepath + "/install/tmp/" print "Start CMD>>>>{0}".format(server_cmd) sessions[setname] = aexpect.ShellSession(initial_cmd) if sessions[setname].is_responsive(): sessions[setname].sendline(server_cmd) timeout = 300 start_time = time.time() result = 0 expected_data = "Starting Servers\n" while True: pattern = "DAOS server" output = sessions[setname].read_nonblocking(2, 2) match = re.findall(pattern, output) expected_data = expected_data + output result += len(match) if result == server_count or time.time() - start_time > timeout: print ("<SERVER>: {}".format(expected_data)) if result != server_count: raise ServerFailed("Server didn't start!") break print "<SERVER> server started and took %s seconds to start" % \ (time.time() - start_time) except Exception as e: print "<SERVER> Exception occurred: {0}".format(str(e)) raise ServerFailed("Server didn't start!")
def run_server(hostfile, setname, basepath, uri_path=None, env_dict=None): """ Launches DAOS servers in accordance with the supplied hostfile. """ global SESSIONS try: servers = ( [line.split(' ')[0] for line in genio.read_all_lines(hostfile)]) server_count = len(servers) #Create the DAOS server configuration yaml file to pass #with daos_server -o <FILE_NAME> create_server_yaml(basepath) # first make sure there are no existing servers running kill_server(servers) # clean the tmpfs on the servers for server in servers: subprocess.check_call(['ssh', server, ("find /mnt/daos -mindepth 1 -maxdepth 1 " "-print0 | xargs -0r rm -rf")]) # pile of build time variables with open(os.path.join(basepath, ".build_vars.json")) as json_vars: build_vars = json.load(json_vars) orterun_bin = os.path.join(build_vars["OMPI_PREFIX"], "bin", "orterun") daos_srv_bin = os.path.join(build_vars["PREFIX"], "bin", "daos_server") env_args = [] # Add any user supplied environment if env_dict is not None: for key, value in env_dict.items(): os.environ[key] = value env_args.extend(["-x", "{}={}".format(key, value)]) server_cmd = [orterun_bin, "--np", str(server_count)] if uri_path is not None: server_cmd.extend(["--report-uri", uri_path]) server_cmd.extend(["--hostfile", hostfile, "--enable-recovery"]) server_cmd.extend(env_args) server_cmd.extend([daos_srv_bin, "-a", os.path.join(basepath, "install", "tmp"), "-o", '{}/{}'.format(basepath, AVOCADO_FILE)]) print("Start CMD>>>>{0}".format(' '.join(server_cmd))) resource.setrlimit( resource.RLIMIT_CORE, (resource.RLIM_INFINITY, resource.RLIM_INFINITY)) SESSIONS[setname] = subprocess.Popen(server_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) fdesc = SESSIONS[setname].stdout.fileno() fstat = fcntl.fcntl(fdesc, fcntl.F_GETFL) fcntl.fcntl(fdesc, fcntl.F_SETFL, fstat | os.O_NONBLOCK) timeout = 600 start_time = time.time() result = 0 pattern = "DAOS I/O server" expected_data = "Starting Servers\n" while True: output = "" try: output = SESSIONS[setname].stdout.read() except IOError as excpn: if excpn.errno != errno.EAGAIN: raise excpn continue match = re.findall(pattern, output) expected_data += output result += len(match) if not output or result == server_count or \ time.time() - start_time > timeout: print("<SERVER>: {}".format(expected_data)) if result != server_count: raise ServerFailed("Server didn't start!") break print("<SERVER> server started and took %s seconds to start" % \ (time.time() - start_time)) except Exception as error: print("<SERVER> Exception occurred: {0}".format(str(error))) traceback.print_exception(excpn.__class__, error, sys.exc_info()[2]) # we need to end the session now -- exit the shell try: SESSIONS[setname].send_signal(signal.SIGINT) time.sleep(5) # get the stderr error = SESSIONS[setname].stderr.read() if SESSIONS[setname].poll() is None: SESSIONS[setname].kill() retcode = SESSIONS[setname].wait() print("<SERVER> server start return code: {}\n" \ "stderr:\n{}".format(retcode, error)) except KeyError: pass raise ServerFailed("Server didn't start!")