def _launch_etcd(self): etcd_exec = self._find_etcd() self._etcd_peer_port = 2380 if is_free_port(2380) else get_free_port() self._etcd_client_port = 2379 if is_free_port( 2379) else get_free_port() self._etcd_endpoint = "http://127.0.0.1:{0}".format( str(self._etcd_client_port)) env = os.environ.copy() env.update({"ETCD_MAX_TXN_OPS": "102400"}) cmd = etcd_exec + [ "--data-dir", str(self._instance_workspace), "--listen-peer-urls", "http://0.0.0.0:{0}".format(str(self._etcd_peer_port)), "--listen-client-urls", "http://0.0.0.0:{0}".format(str(self._etcd_client_port)), "--advertise-client-urls", self._etcd_endpoint, "--initial-cluster", "default=http://127.0.0.1:{0}".format(str(self._etcd_peer_port)), "--initial-advertise-peer-urls", "http://127.0.0.1:{0}".format(str(self._etcd_peer_port)), ] logger.info("Launch etcd with command: %s", " ".join(cmd)) process = subprocess.Popen( cmd, start_new_session=True, cwd=os.getcwd(), env=env, encoding="utf-8", errors="replace", stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, universal_newlines=True, bufsize=1, ) logger.info("Server is initializing etcd.") self._etcd_process = process start_time = time.time() while is_free_port(self._etcd_client_port): time.sleep(1) if (self._timeout_seconds and self._timeout_seconds + start_time < time.time()): raise RuntimeError( "Launch etcd service failed due to timeout.") logger.info("Etcd is ready, endpoint is localhost:{0}".format( self._etcd_client_port))
def _start_analytical_engine(self): rmcp = ResolveMPICmdPrefix() cmd, mpi_env = rmcp.resolve(self._num_workers, self._hosts) master = self._hosts.split(",")[0] rpc_port = get_free_port(master) self._analytical_engine_endpoint = f"{master}:{rpc_port}" cmd.append(ANALYTICAL_ENGINE_PATH) cmd.extend(["--host", "0.0.0.0"]) cmd.extend(["--port", str(rpc_port)]) cmd.extend(["--vineyard_shared_mem", self._shared_mem]) if rmcp.openmpi(): cmd.extend(["-v", str(self._glog_level)]) else: mpi_env["GLOG_v"] = str(self._glog_level) if self._vineyard_socket: cmd.extend(["--vineyard_socket", self._vineyard_socket]) env = os.environ.copy() env.update(mpi_env) logger.info("Launch analytical engine with command: %s", " ".join(cmd)) process = subprocess.Popen( cmd, start_new_session=True, cwd=os.getcwd(), env=env, encoding="utf-8", errors="replace", stdin=subprocess.DEVNULL, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, bufsize=1, ) logger.info("Server is initializing analytical engine.") stdout_watcher = PipeWatcher(process.stdout, sys.stdout) stderr_watcher = PipeWatcher(process.stderr, sys.stderr) setattr(process, "stdout_watcher", stdout_watcher) setattr(process, "stderr_watcher", stderr_watcher) self._analytical_engine_process = process start_time = time.time() while is_free_port(rpc_port): time.sleep(1) if (self._timeout_seconds and self._timeout_seconds + start_time < time.time()): raise RuntimeError( "Launch analytical engine failed due to timeout.") logger.info("Analytical engine is ready, endpoint is {0}".format( self._analytical_engine_endpoint))
def _launch_zetcd(self): self._zookeeper_port = 2181 if is_free_port(2181) else get_free_port() zetcd_exec = shutil.which("zetcd") if not zetcd_exec: raise RuntimeError("zetcd command not found.") cmd = [ zetcd_exec, "--zkaddr", "0.0.0.0:{}".format(self._zookeeper_port), "--endpoints", self._etcd_endpoint, ] process = subprocess.Popen( cmd, start_new_session=True, cwd=os.getcwd(), env=os.environ.copy(), encoding="utf-8", errors="replace", stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT, universal_newlines=True, bufsize=1, ) logger.info("Server is initializing zetcd.") self._zetcd_process = process start_time = time.time() while is_free_port(self._zookeeper_port): time.sleep(1) if (self._timeout_seconds and self._timeout_seconds + start_time < time.time()): raise RuntimeError( "Launch zetcd proxy service failed due to timeout.") logger.info("ZEtcd is ready, endpoint is localhost:{0}".format( self._zookeeper_port))
def _create_vineyard(self): if not self._vineyard_socket: ts = get_timestamp() vineyard_socket = f"{self._vineyard_socket_prefix}{ts}" self._vineyard_rpc_port = 9600 if is_free_port( 9600) else get_free_port() cmd = self._find_vineyardd() cmd.extend(["--socket", vineyard_socket]) cmd.extend(["--rpc_socket_port", str(self._vineyard_rpc_port)]) cmd.extend(["--size", self._shared_mem]) cmd.extend(["-etcd_endpoint", self._etcd_endpoint]) cmd.extend(["-etcd_prefix", f"vineyard.gsa.{ts}"]) env = os.environ.copy() env["GLOG_v"] = str(self._glog_level) logger.info("Launch vineyardd with command: %s", " ".join(cmd)) process = subprocess.Popen( cmd, start_new_session=True, cwd=os.getcwd(), env=env, encoding="utf-8", errors="replace", stdin=subprocess.DEVNULL, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True, bufsize=1, ) logger.info("Server is initializing vineyardd.") stdout_watcher = PipeWatcher( process.stdout, sys.stdout, suppressed=(not logger.isEnabledFor(logging.DEBUG)), ) setattr(process, "stdout_watcher", stdout_watcher) self._vineyard_socket = vineyard_socket self._vineyardd_process = process start_time = time.time() while not os.path.exists(self._vineyard_socket): time.sleep(1) if (self._timeout_seconds and self._timeout_seconds + start_time < time.time()): raise RuntimeError( "Launch vineyardd failed due to timeout.") logger.info("Vineyardd is ready, ipc socket is {0}".format( self._vineyard_socket))
def _create_interactive_engine_service(self): # launch zetcd proxy logger.info("Launching zetcd proxy service ...") zetcd_exec = shutil.which("zetcd") if not zetcd_exec: raise RuntimeError("zetcd command not found.") etcd_endpoints = self._get_etcd_endpoints() cmd = [ zetcd_exec, "--zkaddr", "0.0.0.0:{}".format(self._zookeeper_port), "--endpoints", "{}".format(",".join(etcd_endpoints)), ] logger.info("zetcd cmd {}".format(" ".join(cmd))) self._zetcd_process = subprocess.Popen( cmd, start_new_session=True, cwd=os.getcwd(), env=os.environ.copy(), encoding="utf-8", errors="replace", stdin=subprocess.DEVNULL, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True, bufsize=1, ) stdout_watcher = PipeWatcher(self._zetcd_process.stdout, sys.stdout, drop=True) setattr(self._zetcd_process, "stdout_watcher", stdout_watcher) start_time = time.time() while is_free_port( self._zookeeper_port, socket.gethostbyname(socket.gethostname()), ): time.sleep(1) if (self._saved_locals["timeout_seconds"] and self._saved_locals["timeout_seconds"] + start_time < time.time()): raise RuntimeError("Launch zetcd service failed.") logger.info("ZEtcd is ready, endpoint is {0}:{1}".format( socket.gethostbyname(socket.gethostname()), self._zookeeper_port))
def _launch_coordinator(self): if self._port is None: self._port = get_free_port() else: # check port conflict if not is_free_port(self._port): raise RuntimeError("Port {} already used.".format(self._port)) self._coordinator_endpoint = "{}:{}".format(self._hosts[0], self._port) cmd = [ sys.executable, "-m", "gscoordinator", "--num_workers", "{}".format(str(self._num_workers)), "--hosts", "{}".format(",".join(self._hosts)), "--log_level", "{}".format(gs_config.log_level), "--timeout_seconds", "{}".format(self._timeout_seconds), "--port", "{}".format(str(self._port)), "--cluster_type", self.type(), "--instance_id", self._instance_id, ] if self._etcd_addrs is not None: cmd.extend(["--etcd_addrs", self._etcd_addrs]) if self._vineyard_shared_mem is not None: cmd.extend(["--vineyard_shared_mem", self._vineyard_shared_mem]) if self._vineyard_socket: cmd.extend( ["--vineyard_socket", "{}".format(self._vineyard_socket)]) logger.info("Initializing coordinator with command: %s", " ".join(cmd)) env = os.environ.copy() env["PYTHONUNBUFFERED"] = "TRUE" # add graphscope module to PYTHONPATH if "PYTHONPATH" in env: env["PYTHONPATH"] = ( os.path.join(os.path.dirname(graphscope.__file__), "..") + os.pathsep + env["PYTHONPATH"]) else: env["PYTHONPATH"] = os.path.join( os.path.dirname(graphscope.__file__), "..") # Param `start_new_session=True` is for putting child process to a new process group # so it won't get the signals from parent. # In notebook environment, we need to accept the signal from kernel restarted/stoped. process = subprocess.Popen( cmd, start_new_session=False if in_notebook() else True, cwd=COORDINATOR_HOME, env=env, encoding="utf-8", errors="replace", stdin=subprocess.DEVNULL, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, bufsize=1, ) stdout_watcher = PipeWatcher(process.stdout, sys.stdout) if not gs_config.show_log: stdout_watcher.add_filter( lambda line: "Loading" in line and "it/s]" in line) setattr(process, "stdout_watcher", stdout_watcher) stderr_watcher = PipeWatcher(process.stderr, sys.stderr) setattr(process, "stderr_watcher", stderr_watcher) self._proc = process