def _start_analytical_engine(self): rmcp = ResolveMPICmdPrefix() cmd, mpi_env = rmcp.resolve(self._num_workers, self._hosts) master = self._hosts.split(",")[0] rpc_port = get_free_port(master) self._analytical_engine_endpoint = f"{master}:{rpc_port}" cmd.append(ANALYTICAL_ENGINE_PATH) cmd.extend(["--host", "0.0.0.0"]) cmd.extend(["--port", str(rpc_port)]) cmd.extend(["--vineyard_shared_mem", self._shared_mem]) if rmcp.openmpi(): cmd.extend(["-v", str(self._glog_level)]) else: mpi_env["GLOG_v"] = str(self._glog_level) if self._vineyard_socket: cmd.extend(["--vineyard_socket", self._vineyard_socket]) env = os.environ.copy() env.update(mpi_env) logger.info("Launch analytical engine with command: %s", " ".join(cmd)) process = subprocess.Popen( cmd, start_new_session=True, cwd=os.getcwd(), env=env, encoding="utf-8", errors="replace", stdin=subprocess.DEVNULL, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, bufsize=1, ) logger.info("Server is initializing analytical engine.") stdout_watcher = PipeWatcher(process.stdout, sys.stdout) stderr_watcher = PipeWatcher(process.stderr, sys.stderr) setattr(process, "stdout_watcher", stdout_watcher) setattr(process, "stderr_watcher", stderr_watcher) self._analytical_engine_process = process start_time = time.time() while is_free_port(rpc_port): time.sleep(1) if (self._timeout_seconds and self._timeout_seconds + start_time < time.time()): raise RuntimeError( "Launch analytical engine failed due to timeout.") logger.info("Analytical engine is ready, endpoint is {0}".format( self._analytical_engine_endpoint))
def _launch_analytical_engine_locally(self): logger.info("Starting GAE rpc service on {} ...".format( str(self._analytical_engine_endpoint))) # generate and distribute hostfile with open("/tmp/kube_hosts", "w") as f: for i in range(len(self._pod_ip_list)): f.write("{} {}\n".format(self._pod_ip_list[i], self._pod_name_list[i])) for pod in self._pod_name_list: subprocess.check_call([ "kubectl", "-n", self._namespace, "cp", "/tmp/kube_hosts", "{}:/etc/hosts_of_nodes".format(pod), "-c", self._engine_container_name, ]) # launch engine rmcp = ResolveMPICmdPrefix(rsh_agent=True) cmd, mpi_env = rmcp.resolve(self._num_workers, ",".join(self._pod_name_list)) cmd.append(self._analytical_engine_exec) cmd.extend(["--host", "0.0.0.0"]) cmd.extend(["--port", str(self._random_analytical_engine_rpc_port)]) if rmcp.openmpi(): cmd.extend(["-v", str(self._glog_level)]) else: mpi_env["GLOG_v"] = str(self._glog_level) cmd.extend( ["--vineyard_socket", "/tmp/vineyard_workspace/vineyard.sock"]) logger.debug("Analytical engine launching command: {}".format( " ".join(cmd))) env = os.environ.copy() env.update(mpi_env) self._analytical_engine_process = subprocess.Popen( cmd, env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, encoding="utf-8", ) stdout_watcher = PipeWatcher(self._analytical_engine_process.stdout, sys.stdout, drop=True) setattr(self._analytical_engine_process, "stdout_watcher", stdout_watcher)
def _start_analytical_engine(self): rmcp = ResolveMPICmdPrefix() cmd, mpi_env = rmcp.resolve(self._num_workers, self._hosts) master = self._hosts.split(",")[0] rpc_port = self._get_free_port(master) self._analytical_engine_endpoint = "{}:{}".format( master, str(rpc_port)) cmd.append(ANALYTICAL_ENGINE_PATH) cmd.extend(["--host", "0.0.0.0"]) cmd.extend(["--port", str(rpc_port)]) if rmcp.openmpi(): cmd.extend(["-v", str(self._glog_level)]) else: mpi_env["GLOG_v"] = str(self._glog_level) if self._vineyard_socket: cmd.extend(["--vineyard_socket", self._vineyard_socket]) env = os.environ.copy() env.update(mpi_env) process = subprocess.Popen( cmd, cwd=os.path.dirname(ANALYTICAL_ENGINE_PATH), env=env, universal_newlines=True, encoding="utf-8", stdin=subprocess.DEVNULL, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=1, ) logger.info("Server is initializing analytical engine.") stdout_watcher = PipeWatcher(process.stdout, sys.stdout) setattr(process, "stdout_watcher", stdout_watcher) self._analytical_engine_process = process
def _launch_analytical_engine_locally(self): logger.info("Starting GAE rpc service on {} ...".format( str(self._analytical_engine_endpoint))) # generate and distribute hostfile kube_hosts_path = os.path.join(get_tempdir(), "kube_hosts") with open(kube_hosts_path, "w") as f: for i, pod_ip in enumerate(self._pod_ip_list): f.write("{} {}\n".format(pod_ip, self._pod_name_list[i])) for pod in self._pod_name_list: subprocess.check_call([ shutil.which("kubectl"), "-n", self._saved_locals["namespace"], "cp", kube_hosts_path, "{}:/tmp/hosts_of_nodes".format(pod), "-c", self._engine_container_name, ]) # launch engine rmcp = ResolveMPICmdPrefix(rsh_agent=True) cmd, mpi_env = rmcp.resolve(self._num_workers, ",".join(self._pod_name_list)) cmd.append(ANALYTICAL_ENGINE_PATH) cmd.extend(["--host", "0.0.0.0"]) cmd.extend(["--port", str(self._random_analytical_engine_rpc_port)]) cmd.extend([ "--vineyard_shared_mem", self._saved_locals["vineyard_shared_mem"] ]) if rmcp.openmpi(): cmd.extend(["-v", str(self._glog_level)]) else: mpi_env["GLOG_v"] = str(self._glog_level) cmd.extend([ "--vineyard_socket", os.path.join(get_tempdir(), "vineyard_workspace", "vineyard.sock"), ]) logger.info("Analytical engine launching command: {}".format( " ".join(cmd))) env = os.environ.copy() env.update(mpi_env) self._analytical_engine_process = subprocess.Popen( cmd, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8", errors="replace", universal_newlines=True, bufsize=1, ) stdout_watcher = PipeWatcher(self._analytical_engine_process.stdout, sys.stdout, drop=True) stderr_watcher = PipeWatcher(self._analytical_engine_process.stderr, sys.stderr, drop=True) setattr(self._analytical_engine_process, "stdout_watcher", stdout_watcher) setattr(self._analytical_engine_process, "stderr_watcher", stderr_watcher)