示例#1
0
    def start_specific_server(self, client_id: str,
                              job_config: JobConfig) -> bool:
        """
        Start up a RayClient Server for an incoming client to
        communicate with. Returns whether creation was successful.
        """
        specific_server = self._get_server_for_client(client_id)
        assert specific_server, f"Server has not been created for: {client_id}"

        output, error = self.node.get_log_file_handles(
            f"ray_client_server_{specific_server.port}", unique=True)

        serialized_runtime_env = job_config.get_serialized_runtime_env()
        runtime_env_config = job_config.get_proto_runtime_env_config()
        if not serialized_runtime_env or serialized_runtime_env == "{}":
            # TODO(edoakes): can we just remove this case and always send it
            # to the agent?
            serialized_runtime_env_context = RuntimeEnvContext().serialize()
        else:
            serialized_runtime_env_context = self._create_runtime_env(
                serialized_runtime_env=serialized_runtime_env,
                runtime_env_config=runtime_env_config,
                specific_server=specific_server,
            )

        proc = start_ray_client_server(
            self.address,
            self.node.node_ip_address,
            specific_server.port,
            stdout_file=output,
            stderr_file=error,
            fate_share=self.fate_share,
            server_type="specific-server",
            serialized_runtime_env_context=serialized_runtime_env_context,
            redis_password=self._redis_password,
        )

        # Wait for the process being run transitions from the shim process
        # to the actual RayClient Server.
        pid = proc.process.pid
        if sys.platform != "win32":
            psutil_proc = psutil.Process(pid)
        else:
            psutil_proc = None
        # Don't use `psutil` on Win32
        while psutil_proc is not None:
            if proc.process.poll() is not None:
                logger.error(
                    f"SpecificServer startup failed for client: {client_id}")
                break
            cmd = psutil_proc.cmdline()
            if _match_running_client_server(cmd):
                break
            logger.debug(
                "Waiting for Process to reach the actual client server.")
            time.sleep(0.5)
        specific_server.set_result(proc)
        logger.info(f"SpecificServer started on port: {specific_server.port} "
                    f"with PID: {pid} for client: {client_id}")
        return proc.process.poll() is None
示例#2
0
    def connect(self,
                conn_str: str,
                job_config: JobConfig = None,
                secure: bool = False,
                metadata: List[Tuple[str, str]] = None,
                connection_retries: int = 3,
                namespace: str = None,
                *,
                ignore_version: bool = False) -> Dict[str, Any]:
        """Connect the Ray Client to a server.

        Args:
            conn_str: Connection string, in the form "[host]:port"
            job_config: The job config of the server.
            secure: Whether to use a TLS secured gRPC channel
            metadata: gRPC metadata to send on connect
            connection_retries: number of connection attempts to make
            ignore_version: whether to ignore Python or Ray version mismatches.
                This should only be used for debugging purposes.

        Returns:
            Dictionary of connection info, e.g., {"num_clients": 1}.
        """
        # Delay imports until connect to avoid circular imports.
        from ray.util.client.worker import Worker
        import ray._private.client_mode_hook
        if self.client_worker is not None:
            if self._connected_with_init:
                return
            raise Exception(
                "ray.connect() called, but ray client is already connected")
        if not self._inside_client_test:
            # If we're calling a client connect specifically and we're not
            # currently in client mode, ensure we are.
            ray._private.client_mode_hook._explicitly_enable_client_mode()
        if namespace is not None:
            job_config = job_config or JobConfig()
            job_config.set_ray_namespace(namespace)
        if job_config is not None:
            runtime_env = json.loads(job_config.get_serialized_runtime_env())
            if runtime_env.get("pip") or runtime_env.get("conda"):
                logger.warning("The 'pip' or 'conda' field was specified in "
                               "the runtime env, so it may take some time to "
                               "install the environment before ray.connect() "
                               "returns.")
        try:
            self.client_worker = Worker(conn_str,
                                        secure=secure,
                                        metadata=metadata,
                                        connection_retries=connection_retries)
            self.api.worker = self.client_worker
            self.client_worker._server_init(job_config)
            conn_info = self.client_worker.connection_info()
            self._check_versions(conn_info, ignore_version)
            self._register_serializers()
            return conn_info
        except Exception:
            self.disconnect()
            raise
示例#3
0
文件: proxier.py 项目: haochihlin/ray
    def start_specific_server(self, client_id: str,
                              job_config: JobConfig) -> bool:
        """
        Start up a RayClient Server for an incoming client to
        communicate with. Returns whether creation was successful.
        """
        specific_server = self._get_server_for_client(client_id)
        assert specific_server, f"Server has not been created for: {client_id}"

        output, error = self.node.get_log_file_handles(
            f"ray_client_server_{specific_server.port}", unique=True)

        serialized_runtime_env = job_config.get_serialized_runtime_env()
        runtime_env = json.loads(serialized_runtime_env)

        # Set up the working_dir for the server.
        # TODO(edoakes): this should go be unified with the worker setup code
        # by going through the runtime_env agent.
        context = RuntimeEnvContext(
            env_vars=runtime_env.get("env_vars"),
            resources_dir=self.node.get_runtime_env_dir_path())
        working_dir_pkg.setup_working_dir(runtime_env, context)

        proc = start_ray_client_server(
            self.redis_address,
            specific_server.port,
            stdout_file=output,
            stderr_file=error,
            fate_share=self.fate_share,
            server_type="specific-server",
            serialized_runtime_env=serialized_runtime_env,
            serialized_runtime_env_context=context.serialize(),
            redis_password=self._redis_password)

        # Wait for the process being run transitions from the shim process
        # to the actual RayClient Server.
        pid = proc.process.pid
        if sys.platform != "win32":
            psutil_proc = psutil.Process(pid)
        else:
            psutil_proc = None
        # Don't use `psutil` on Win32
        while psutil_proc is not None:
            if proc.process.poll() is not None:
                logger.error(
                    f"SpecificServer startup failed for client: {client_id}")
                break
            cmd = psutil_proc.cmdline()
            if _match_running_client_server(cmd):
                break
            logger.debug(
                "Waiting for Process to reach the actual client server.")
            time.sleep(0.5)
        specific_server.set_result(proc)
        logger.info(f"SpecificServer started on port: {specific_server.port} "
                    f"with PID: {pid} for client: {client_id}")
        return proc.process.poll() is None
示例#4
0
    def start_specific_server(self, client_id: str,
                              job_config: JobConfig) -> bool:
        """
        Start up a RayClient Server for an incoming client to
        communicate with. Returns whether creation was successful.
        """
        with self.server_lock:
            port = self._get_unused_port()
            handle_ready = futures.Future()
            specific_server = SpecificServer(
                port=port,
                process_handle_future=handle_ready,
                channel=grpc.insecure_channel(f"localhost:{port}",
                                              options=GRPC_OPTIONS))
            self.servers[client_id] = specific_server

        serialized_runtime_env = job_config.get_serialized_runtime_env()

        proc = start_ray_client_server(
            self.redis_address,
            port,
            fate_share=self.fate_share,
            server_type="specific-server",
            serialized_runtime_env=serialized_runtime_env,
            session_dir=self._get_session_dir())

        # Wait for the process being run transitions from the shim process
        # to the actual RayClient Server.
        pid = proc.process.pid
        if sys.platform != "win32":
            psutil_proc = psutil.Process(pid)
        else:
            psutil_proc = None
        # Don't use `psutil` on Win32
        while psutil_proc is not None:
            if proc.process.poll() is not None:
                logger.error(
                    f"SpecificServer startup failed for client: {client_id}")
                break
            cmd = psutil_proc.cmdline()
            if len(cmd) > 3 and cmd[2] == "ray.util.client.server":
                break
            logger.debug(
                "Waiting for Process to reach the actual client server.")
            time.sleep(0.5)
        handle_ready.set_result(proc)
        logger.info(f"SpecificServer started on port: {port} with PID: {pid} "
                    f"for client: {client_id}")
        return proc.process.poll() is None
示例#5
0
    def start_specific_server(self, client_id: str,
                              job_config: JobConfig) -> bool:
        """
        Start up a RayClient Server for an incoming client to
        communicate with. Returns whether creation was successful.
        """
        specific_server = self._get_server_for_client(client_id)
        assert specific_server, f"Server has not been created for: {client_id}"

        serialized_runtime_env = job_config.get_serialized_runtime_env()

        output, error = self.node.get_log_file_handles(
            f"ray_client_server_{specific_server.port}", unique=True)

        proc = start_ray_client_server(
            self.redis_address,
            specific_server.port,
            stdout_file=output,
            stderr_file=error,
            fate_share=self.fate_share,
            server_type="specific-server",
            serialized_runtime_env=serialized_runtime_env,
            session_dir=self.node.get_session_dir_path())

        # Wait for the process being run transitions from the shim process
        # to the actual RayClient Server.
        pid = proc.process.pid
        if sys.platform != "win32":
            psutil_proc = psutil.Process(pid)
        else:
            psutil_proc = None
        # Don't use `psutil` on Win32
        while psutil_proc is not None:
            if proc.process.poll() is not None:
                logger.error(
                    f"SpecificServer startup failed for client: {client_id}")
                break
            cmd = psutil_proc.cmdline()
            if _match_running_client_server(cmd):
                break
            logger.debug(
                "Waiting for Process to reach the actual client server.")
            time.sleep(0.5)
        specific_server.set_result(proc)
        logger.info(f"SpecificServer started on port: {specific_server.port} "
                    f"with PID: {pid} for client: {client_id}")
        return proc.process.poll() is None
示例#6
0
    def start_specific_server(self, client_id: str,
                              job_config: JobConfig) -> bool:
        """
        Start up a RayClient Server for an incoming client to
        communicate with. Returns whether creation was successful.
        """
        specific_server = self._get_server_for_client(client_id)
        assert specific_server, f"Server has not been created for: {client_id}"

        output, error = self.node.get_log_file_handles(
            f"ray_client_server_{specific_server.port}", unique=True)

        # Set up the working_dir for the server.
        # TODO(edoakes): this should go be unified with the worker setup code
        # by going through the runtime_env agent.
        uris = job_config.get_runtime_env_uris() if job_config else []
        if uris:
            # Download and set up the working_dir locally.
            working_dir = working_dir_pkg.ensure_runtime_env_setup(uris)

            # Set PYTHONPATH in the environment variables so the working_dir
            # is included in the module search path.
            runtime_env = job_config.runtime_env
            env_vars = runtime_env.get("env_vars", None) or {}
            python_path = working_dir
            if "PYTHONPATH" in env_vars:
                python_path += (os.pathsep + runtime_env["PYTHONPATH"])
            env_vars["PYTHONPATH"] = python_path
            runtime_env["env_vars"] = env_vars
            job_config.set_runtime_env(runtime_env)

        serialized_runtime_env = job_config.get_serialized_runtime_env()

        proc = start_ray_client_server(
            self.redis_address,
            specific_server.port,
            stdout_file=output,
            stderr_file=error,
            fate_share=self.fate_share,
            server_type="specific-server",
            serialized_runtime_env=serialized_runtime_env,
            session_dir=self.node.get_session_dir_path(),
            redis_password=self._redis_password)

        # Wait for the process being run transitions from the shim process
        # to the actual RayClient Server.
        pid = proc.process.pid
        if sys.platform != "win32":
            psutil_proc = psutil.Process(pid)
        else:
            psutil_proc = None
        # Don't use `psutil` on Win32
        while psutil_proc is not None:
            if proc.process.poll() is not None:
                logger.error(
                    f"SpecificServer startup failed for client: {client_id}")
                break
            cmd = psutil_proc.cmdline()
            if _match_running_client_server(cmd):
                break
            logger.debug(
                "Waiting for Process to reach the actual client server.")
            time.sleep(0.5)
        specific_server.set_result(proc)
        logger.info(f"SpecificServer started on port: {specific_server.port} "
                    f"with PID: {pid} for client: {client_id}")
        return proc.process.poll() is None