Exemplo n.º 1
0
    def _launch_etcd(self):
        etcd_exec = self._find_etcd()
        self._etcd_peer_port = 2380 if is_free_port(2380) else get_free_port()
        self._etcd_client_port = 2379 if is_free_port(
            2379) else get_free_port()
        self._etcd_endpoint = "http://127.0.0.1:{0}".format(
            str(self._etcd_client_port))

        env = os.environ.copy()
        env.update({"ETCD_MAX_TXN_OPS": "102400"})

        cmd = etcd_exec + [
            "--data-dir",
            str(self._instance_workspace),
            "--listen-peer-urls",
            "http://0.0.0.0:{0}".format(str(self._etcd_peer_port)),
            "--listen-client-urls",
            "http://0.0.0.0:{0}".format(str(self._etcd_client_port)),
            "--advertise-client-urls",
            self._etcd_endpoint,
            "--initial-cluster",
            "default=http://127.0.0.1:{0}".format(str(self._etcd_peer_port)),
            "--initial-advertise-peer-urls",
            "http://127.0.0.1:{0}".format(str(self._etcd_peer_port)),
        ]
        logger.info("Launch etcd with command: %s", " ".join(cmd))

        process = subprocess.Popen(
            cmd,
            start_new_session=True,
            cwd=os.getcwd(),
            env=env,
            encoding="utf-8",
            errors="replace",
            stdin=subprocess.DEVNULL,
            stdout=subprocess.DEVNULL,
            stderr=subprocess.DEVNULL,
            universal_newlines=True,
            bufsize=1,
        )

        logger.info("Server is initializing etcd.")
        self._etcd_process = process

        start_time = time.time()

        while is_free_port(self._etcd_client_port):
            time.sleep(1)
            if (self._timeout_seconds
                    and self._timeout_seconds + start_time < time.time()):
                raise RuntimeError(
                    "Launch etcd service failed due to timeout.")
        logger.info("Etcd is ready, endpoint is localhost:{0}".format(
            self._etcd_client_port))
Exemplo n.º 2
0
    def _start_analytical_engine(self):
        rmcp = ResolveMPICmdPrefix()
        cmd, mpi_env = rmcp.resolve(self._num_workers, self._hosts)

        master = self._hosts.split(",")[0]
        rpc_port = get_free_port(master)
        self._analytical_engine_endpoint = f"{master}:{rpc_port}"

        cmd.append(ANALYTICAL_ENGINE_PATH)
        cmd.extend(["--host", "0.0.0.0"])
        cmd.extend(["--port", str(rpc_port)])
        cmd.extend(["--vineyard_shared_mem", self._shared_mem])

        if rmcp.openmpi():
            cmd.extend(["-v", str(self._glog_level)])
        else:
            mpi_env["GLOG_v"] = str(self._glog_level)

        if self._vineyard_socket:
            cmd.extend(["--vineyard_socket", self._vineyard_socket])

        env = os.environ.copy()
        env.update(mpi_env)

        logger.info("Launch analytical engine with command: %s", " ".join(cmd))

        process = subprocess.Popen(
            cmd,
            start_new_session=True,
            cwd=os.getcwd(),
            env=env,
            encoding="utf-8",
            errors="replace",
            stdin=subprocess.DEVNULL,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            universal_newlines=True,
            bufsize=1,
        )

        logger.info("Server is initializing analytical engine.")
        stdout_watcher = PipeWatcher(process.stdout, sys.stdout)
        stderr_watcher = PipeWatcher(process.stderr, sys.stderr)
        setattr(process, "stdout_watcher", stdout_watcher)
        setattr(process, "stderr_watcher", stderr_watcher)

        self._analytical_engine_process = process

        start_time = time.time()

        while is_free_port(rpc_port):
            time.sleep(1)
            if (self._timeout_seconds
                    and self._timeout_seconds + start_time < time.time()):
                raise RuntimeError(
                    "Launch analytical engine failed due to timeout.")
        logger.info("Analytical engine is ready, endpoint is {0}".format(
            self._analytical_engine_endpoint))
Exemplo n.º 3
0
    def _launch_zetcd(self):
        self._zookeeper_port = 2181 if is_free_port(2181) else get_free_port()

        zetcd_exec = shutil.which("zetcd")
        if not zetcd_exec:
            raise RuntimeError("zetcd command not found.")
        cmd = [
            zetcd_exec,
            "--zkaddr",
            "0.0.0.0:{}".format(self._zookeeper_port),
            "--endpoints",
            self._etcd_endpoint,
        ]

        process = subprocess.Popen(
            cmd,
            start_new_session=True,
            cwd=os.getcwd(),
            env=os.environ.copy(),
            encoding="utf-8",
            errors="replace",
            stdin=subprocess.DEVNULL,
            stdout=subprocess.DEVNULL,
            stderr=subprocess.STDOUT,
            universal_newlines=True,
            bufsize=1,
        )

        logger.info("Server is initializing zetcd.")
        self._zetcd_process = process

        start_time = time.time()
        while is_free_port(self._zookeeper_port):
            time.sleep(1)
            if (self._timeout_seconds
                    and self._timeout_seconds + start_time < time.time()):
                raise RuntimeError(
                    "Launch zetcd proxy service failed due to timeout.")
        logger.info("ZEtcd is ready, endpoint is localhost:{0}".format(
            self._zookeeper_port))
Exemplo n.º 4
0
    def _create_vineyard(self):
        if not self._vineyard_socket:
            ts = get_timestamp()
            vineyard_socket = f"{self._vineyard_socket_prefix}{ts}"
            self._vineyard_rpc_port = 9600 if is_free_port(
                9600) else get_free_port()

            cmd = self._find_vineyardd()
            cmd.extend(["--socket", vineyard_socket])
            cmd.extend(["--rpc_socket_port", str(self._vineyard_rpc_port)])
            cmd.extend(["--size", self._shared_mem])
            cmd.extend(["-etcd_endpoint", self._etcd_endpoint])
            cmd.extend(["-etcd_prefix", f"vineyard.gsa.{ts}"])
            env = os.environ.copy()
            env["GLOG_v"] = str(self._glog_level)

            logger.info("Launch vineyardd with command: %s", " ".join(cmd))

            process = subprocess.Popen(
                cmd,
                start_new_session=True,
                cwd=os.getcwd(),
                env=env,
                encoding="utf-8",
                errors="replace",
                stdin=subprocess.DEVNULL,
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
                universal_newlines=True,
                bufsize=1,
            )

            logger.info("Server is initializing vineyardd.")
            stdout_watcher = PipeWatcher(
                process.stdout,
                sys.stdout,
                suppressed=(not logger.isEnabledFor(logging.DEBUG)),
            )
            setattr(process, "stdout_watcher", stdout_watcher)

            self._vineyard_socket = vineyard_socket
            self._vineyardd_process = process

            start_time = time.time()
            while not os.path.exists(self._vineyard_socket):
                time.sleep(1)
                if (self._timeout_seconds
                        and self._timeout_seconds + start_time < time.time()):
                    raise RuntimeError(
                        "Launch vineyardd failed due to timeout.")
            logger.info("Vineyardd is ready, ipc socket is {0}".format(
                self._vineyard_socket))
Exemplo n.º 5
0
    def _create_interactive_engine_service(self):
        # launch zetcd proxy
        logger.info("Launching zetcd proxy service ...")
        zetcd_exec = shutil.which("zetcd")
        if not zetcd_exec:
            raise RuntimeError("zetcd command not found.")
        etcd_endpoints = self._get_etcd_endpoints()
        cmd = [
            zetcd_exec,
            "--zkaddr",
            "0.0.0.0:{}".format(self._zookeeper_port),
            "--endpoints",
            "{}".format(",".join(etcd_endpoints)),
        ]
        logger.info("zetcd cmd {}".format(" ".join(cmd)))

        self._zetcd_process = subprocess.Popen(
            cmd,
            start_new_session=True,
            cwd=os.getcwd(),
            env=os.environ.copy(),
            encoding="utf-8",
            errors="replace",
            stdin=subprocess.DEVNULL,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            universal_newlines=True,
            bufsize=1,
        )
        stdout_watcher = PipeWatcher(self._zetcd_process.stdout,
                                     sys.stdout,
                                     drop=True)
        setattr(self._zetcd_process, "stdout_watcher", stdout_watcher)

        start_time = time.time()
        while is_free_port(
                self._zookeeper_port,
                socket.gethostbyname(socket.gethostname()),
        ):
            time.sleep(1)
            if (self._saved_locals["timeout_seconds"]
                    and self._saved_locals["timeout_seconds"] + start_time <
                    time.time()):
                raise RuntimeError("Launch zetcd service failed.")
        logger.info("ZEtcd is ready, endpoint is {0}:{1}".format(
            socket.gethostbyname(socket.gethostname()), self._zookeeper_port))
Exemplo n.º 6
0
    def _launch_coordinator(self):
        if self._port is None:
            self._port = get_free_port()
        else:
            # check port conflict
            if not is_free_port(self._port):
                raise RuntimeError("Port {} already used.".format(self._port))

        self._coordinator_endpoint = "{}:{}".format(self._hosts[0], self._port)

        cmd = [
            sys.executable,
            "-m",
            "gscoordinator",
            "--num_workers",
            "{}".format(str(self._num_workers)),
            "--hosts",
            "{}".format(",".join(self._hosts)),
            "--log_level",
            "{}".format(gs_config.log_level),
            "--timeout_seconds",
            "{}".format(self._timeout_seconds),
            "--port",
            "{}".format(str(self._port)),
            "--cluster_type",
            self.type(),
            "--instance_id",
            self._instance_id,
        ]

        if self._etcd_addrs is not None:
            cmd.extend(["--etcd_addrs", self._etcd_addrs])

        if self._vineyard_shared_mem is not None:
            cmd.extend(["--vineyard_shared_mem", self._vineyard_shared_mem])

        if self._vineyard_socket:
            cmd.extend(
                ["--vineyard_socket", "{}".format(self._vineyard_socket)])

        logger.info("Initializing coordinator with command: %s", " ".join(cmd))

        env = os.environ.copy()
        env["PYTHONUNBUFFERED"] = "TRUE"
        # add graphscope module to PYTHONPATH
        if "PYTHONPATH" in env:
            env["PYTHONPATH"] = (
                os.path.join(os.path.dirname(graphscope.__file__), "..") +
                os.pathsep + env["PYTHONPATH"])
        else:
            env["PYTHONPATH"] = os.path.join(
                os.path.dirname(graphscope.__file__), "..")

        # Param `start_new_session=True` is for putting child process to a new process group
        # so it won't get the signals from parent.
        # In notebook environment, we need to accept the signal from kernel restarted/stoped.
        process = subprocess.Popen(
            cmd,
            start_new_session=False if in_notebook() else True,
            cwd=COORDINATOR_HOME,
            env=env,
            encoding="utf-8",
            errors="replace",
            stdin=subprocess.DEVNULL,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            universal_newlines=True,
            bufsize=1,
        )
        stdout_watcher = PipeWatcher(process.stdout, sys.stdout)
        if not gs_config.show_log:
            stdout_watcher.add_filter(
                lambda line: "Loading" in line and "it/s]" in line)
        setattr(process, "stdout_watcher", stdout_watcher)
        stderr_watcher = PipeWatcher(process.stderr, sys.stderr)
        setattr(process, "stderr_watcher", stderr_watcher)
        self._proc = process