Exemplo n.º 1
0
    async def setup(
        self,
        runtime_env: "RuntimeEnv",  # noqa: F821
        context: RuntimeEnvContext,
        logger: Optional[logging.Logger] = default_logger,
    ):
        if not runtime_env.has_py_container(
        ) or not runtime_env.py_container_image():
            return

        container_driver = "podman"
        container_command = [
            container_driver,
            "run",
            "-v",
            self._ray_tmp_dir + ":" + self._ray_tmp_dir,
            "--cgroup-manager=cgroupfs",
            "--network=host",
            "--pid=host",
            "--ipc=host",
            "--env-host",
        ]
        container_command.append("--env")
        container_command.append("RAY_RAYLET_PID=" +
                                 os.getenv("RAY_RAYLET_PID"))
        if runtime_env.py_container_run_options():
            container_command.extend(runtime_env.py_container_run_options())
        # TODO(chenk008): add resource limit
        container_command.append("--entrypoint")
        container_command.append("python")
        container_command.append(runtime_env.py_container_image())
        context.py_executable = " ".join(container_command)
        logger.info("start worker in container with prefix: {}".format(
            context.py_executable))
Exemplo n.º 2
0
    def setup(self,
              runtime_env: RuntimeEnv,
              context: RuntimeEnvContext,
              logger: Optional[logging.Logger] = default_logger):
        if not runtime_env.has_conda() and not runtime_env.has_pip():
            return

        logger.debug("Setting up conda or pip for runtime_env: "
                     f"{runtime_env.serialize()}")

        if runtime_env.conda_env_name():
            conda_env_name = runtime_env.conda_env_name()
        else:
            conda_dict = get_conda_dict(runtime_env, self._resources_dir)
            protocol, hash = parse_uri(runtime_env.conda_uri())
            conda_env_name = self._get_path_from_hash(hash)
            assert conda_dict is not None

            ray_pip = current_ray_pip_specifier(logger=logger)
            if ray_pip:
                extra_pip_dependencies = [ray_pip, "ray[default]"]
            elif runtime_env.get_extension("_inject_current_ray") == "True":
                extra_pip_dependencies = (
                    _resolve_install_from_source_ray_dependencies())
            else:
                extra_pip_dependencies = []
            conda_dict = inject_dependencies(conda_dict, _current_py_version(),
                                             extra_pip_dependencies)

            # It is not safe for multiple processes to install conda envs
            # concurrently, even if the envs are different, so use a global
            # lock for all conda installs.
            # See https://github.com/ray-project/ray/issues/17086
            file_lock_name = "ray-conda-install.lock"
            with FileLock(os.path.join(self._resources_dir, file_lock_name)):
                try:
                    conda_yaml_file = os.path.join(self._resources_dir,
                                                   "environment.yml")
                    with open(conda_yaml_file, "w") as file:
                        yaml.dump(conda_dict, file)

                    if conda_env_name in self._created_envs:
                        logger.debug(f"Conda env {conda_env_name} already "
                                     "created, skipping creation.")
                    else:
                        create_conda_env(conda_yaml_file,
                                         prefix=conda_env_name,
                                         logger=logger)
                        self._created_envs.add(conda_env_name)
                finally:
                    os.remove(conda_yaml_file)

                if runtime_env.get_extension("_inject_current_ray"):
                    _inject_ray_to_conda_site(conda_path=conda_env_name,
                                              logger=logger)

        context.py_executable = "python"
        context.command_prefix += get_conda_activate_commands(conda_env_name)
        logger.info(
            f"Finished setting up runtime environment at {conda_env_name}")
Exemplo n.º 3
0
    def modify_context(
        self,
        uris: List[str],
        runtime_env: "RuntimeEnv",  # noqa: F821
        context: RuntimeEnvContext,
        logger: Optional[logging.Logger] = default_logger,
    ):
        if not runtime_env.has_pip():
            return
        # PipPlugin only uses a single URI.
        uri = uris[0]
        # Update py_executable.
        protocol, hash = parse_uri(uri)
        target_dir = self._get_path_from_hash(hash)
        virtualenv_python = _PathHelper.get_virtualenv_python(target_dir)

        if not os.path.exists(virtualenv_python):
            raise ValueError(
                f"Local directory {target_dir} for URI {uri} does "
                "not exist on the cluster. Something may have gone wrong while "
                "installing the runtime_env `pip` packages.")
        context.py_executable = virtualenv_python
        context.command_prefix += [
            _PathHelper.get_virtualenv_activate_command(target_dir)
        ]
Exemplo n.º 4
0
    def start_specific_server(self, client_id: str,
                              job_config: JobConfig) -> bool:
        """
        Start up a RayClient Server for an incoming client to
        communicate with. Returns whether creation was successful.
        """
        specific_server = self._get_server_for_client(client_id)
        assert specific_server, f"Server has not been created for: {client_id}"

        output, error = self.node.get_log_file_handles(
            f"ray_client_server_{specific_server.port}", unique=True)

        serialized_runtime_env = job_config.get_serialized_runtime_env()
        runtime_env_config = job_config.get_proto_runtime_env_config()
        if not serialized_runtime_env or serialized_runtime_env == "{}":
            # TODO(edoakes): can we just remove this case and always send it
            # to the agent?
            serialized_runtime_env_context = RuntimeEnvContext().serialize()
        else:
            serialized_runtime_env_context = self._create_runtime_env(
                serialized_runtime_env=serialized_runtime_env,
                runtime_env_config=runtime_env_config,
                specific_server=specific_server,
            )

        proc = start_ray_client_server(
            self.address,
            self.node.node_ip_address,
            specific_server.port,
            stdout_file=output,
            stderr_file=error,
            fate_share=self.fate_share,
            server_type="specific-server",
            serialized_runtime_env_context=serialized_runtime_env_context,
            redis_password=self._redis_password,
        )

        # Wait for the process being run transitions from the shim process
        # to the actual RayClient Server.
        pid = proc.process.pid
        if sys.platform != "win32":
            psutil_proc = psutil.Process(pid)
        else:
            psutil_proc = None
        # Don't use `psutil` on Win32
        while psutil_proc is not None:
            if proc.process.poll() is not None:
                logger.error(
                    f"SpecificServer startup failed for client: {client_id}")
                break
            cmd = psutil_proc.cmdline()
            if _match_running_client_server(cmd):
                break
            logger.debug(
                "Waiting for Process to reach the actual client server.")
            time.sleep(0.5)
        specific_server.set_result(proc)
        logger.info(f"SpecificServer started on port: {specific_server.port} "
                    f"with PID: {pid} for client: {client_id}")
        return proc.process.poll() is None
Exemplo n.º 5
0
            def run_setup_with_logger():
                runtime_env: dict = json.loads(serialized_runtime_env or "{}")
                allocated_resource: dict = json.loads(
                    serialized_allocated_resource_instances or "{}")

                # Use a separate logger for each job.
                per_job_logger = self.get_or_create_logger(request.job_id)
                # TODO(chenk008): Add log about allocated_resource to
                # avoid lint error. That will be moved to cgroup plugin.
                per_job_logger.debug(f"Worker has resource :"
                                     f"{allocated_resource}")
                context = RuntimeEnvContext(
                    env_vars=runtime_env.get("env_vars"))
                self._conda_manager.setup(
                    runtime_env, context, logger=per_job_logger)
                self._working_dir_manager.setup(
                    runtime_env, context, logger=per_job_logger)

                # Add the mapping of URIs -> the serialized environment to be
                # used for cache invalidation.
                for uri in runtime_env.get("uris") or []:
                    self._working_dir_uri_to_envs[uri].add(
                        serialized_runtime_env)

                # Run setup function from all the plugins
                for plugin_class_path in runtime_env.get("plugins", {}).keys():
                    plugin_class = import_attr(plugin_class_path)
                    # TODO(simon): implement uri support
                    plugin_class.create("uri not implemented", runtime_env,
                                        context)
                    plugin_class.modify_context("uri not implemented",
                                                runtime_env, context)

                return context
Exemplo n.º 6
0
    def setup(self,
              runtime_env: RuntimeEnv,
              context: RuntimeEnvContext,
              logger: Optional[logging.Logger] = default_logger):
        if not runtime_env.has_pip():
            return

        logger.debug(f"Setting up pip for runtime_env: {runtime_env}")
        pip_packages: List[str] = runtime_env.pip_packages()
        target_dir = self._get_path_from_hash(_get_pip_hash(pip_packages))

        _install_pip_list_to_dir(pip_packages, target_dir, logger=logger)

        # Despite Ray being removed from the input pip list during validation,
        # other packages in the pip list (for example, xgboost_ray) may
        # themselves include Ray as a dependency.  In this case, we will have
        # inadvertently installed the latest Ray version in the target_dir,
        # which may cause Ray version mismatch issues. Uninstall it here, if it
        # exists, to make the workers use the Ray that is already
        # installed in the cluster.
        #
        # In the case where the user explicitly wants to include Ray in their
        # pip list (and signals this by setting the environment variable below)
        # then we don't want this deletion logic, so we skip it.
        if os.environ.get(RAY_RUNTIME_ENV_ALLOW_RAY_IN_PIP) != 1:
            ray_path = Path(target_dir) / "ray"
            if ray_path.exists() and ray_path.is_dir():
                shutil.rmtree(ray_path)

        # Insert the target directory into the PYTHONPATH.
        python_path = target_dir
        if "PYTHONPATH" in context.env_vars:
            python_path += os.pathsep + context.env_vars["PYTHONPATH"]
        context.env_vars["PYTHONPATH"] = python_path
Exemplo n.º 7
0
 def modify_context(uri: str, plugin_config_dict: dict,
                    ctx: RuntimeEnvContext) -> None:
     ctx.env_vars[MyPlugin.env_key] = str(plugin_config_dict["env_value"])
     ctx.command_prefix.append(
         f"echo {plugin_config_dict['tmp_content']} > "
         f"{plugin_config_dict['tmp_file']}")
     ctx.py_executable = (plugin_config_dict["prefix_command"] + " " +
                          ctx.py_executable)
Exemplo n.º 8
0
def test_inherit_cluster_env_pythonpath(monkeypatch):
    monkeypatch.setenv("PYTHONPATH",
                       "last" + os.pathsep + os.environ.get("PYTHONPATH", ""))
    context = RuntimeEnvContext(env_vars={"PYTHONPATH": "middle"})

    set_pythonpath_in_context("first", context)

    assert context.env_vars["PYTHONPATH"].startswith(
        os.pathsep.join(["first", "middle", "last"]))
Exemplo n.º 9
0
            def run_setup_with_logger():
                runtime_env = RuntimeEnv(
                    serialized_runtime_env=serialized_runtime_env)
                allocated_resource: dict = json.loads(
                    serialized_allocated_resource_instances or "{}")

                # Use a separate logger for each job.
                per_job_logger = self.get_or_create_logger(request.job_id)
                # TODO(chenk008): Add log about allocated_resource to
                # avoid lint error. That will be moved to cgroup plugin.
                per_job_logger.debug(f"Worker has resource :"
                                     f"{allocated_resource}")
                context = RuntimeEnvContext(env_vars=runtime_env.env_vars())
                self._pip_manager.setup(
                    runtime_env, context, logger=per_job_logger)
                self._conda_manager.setup(
                    runtime_env, context, logger=per_job_logger)
                self._py_modules_manager.setup(
                    runtime_env, context, logger=per_job_logger)
                self._working_dir_manager.setup(
                    runtime_env, context, logger=per_job_logger)
                self._container_manager.setup(
                    runtime_env, context, logger=per_job_logger)

                # Add the mapping of URIs -> the serialized environment to be
                # used for cache invalidation.
                if runtime_env.working_dir_uri():
                    uri = runtime_env.working_dir_uri()
                    self._uris_to_envs[uri].add(serialized_runtime_env)
                if runtime_env.py_modules_uris():
                    for uri in runtime_env.py_modules_uris():
                        self._uris_to_envs[uri].add(serialized_runtime_env)
                if runtime_env.conda_uri():
                    uri = runtime_env.conda_uri()
                    self._uris_to_envs[uri].add(serialized_runtime_env)
                if runtime_env.pip_uri():
                    uri = runtime_env.pip_uri()
                    self._uris_to_envs[uri].add(serialized_runtime_env)
                if runtime_env.plugin_uris():
                    for uri in runtime_env.plugin_uris():
                        self._uris_to_envs[uri].add(serialized_runtime_env)

                # Run setup function from all the plugins
                for plugin_class_path, config in runtime_env.plugins():
                    logger.debug(
                        f"Setting up runtime env plugin {plugin_class_path}")
                    plugin_class = import_attr(plugin_class_path)
                    # TODO(simon): implement uri support
                    plugin_class.create("uri not implemented",
                                        json.loads(config), context)
                    plugin_class.modify_context("uri not implemented",
                                                json.loads(config), context)

                return context
Exemplo n.º 10
0
 def modify_context(
     self,
     uris: List[str],
     plugin_config_dict: dict,
     ctx: RuntimeEnvContext,
     logger: logging.Logger,
 ) -> None:
     if PRIORITY_TEST_ENV_VAR_NAME in ctx.env_vars:
         raise RuntimeError(
             f"Env var {PRIORITY_TEST_ENV_VAR_NAME} has been set to "
             f"{ctx.env_vars[PRIORITY_TEST_ENV_VAR_NAME]}.")
     ctx.env_vars[
         PRIORITY_TEST_ENV_VAR_NAME] = PriorityTestPlugin2.env_value
Exemplo n.º 11
0
 def modify_context(
     self,
     uris: List[str],
     plugin_config_dict: dict,
     ctx: RuntimeEnvContext,
     logger: logging.Logger,
 ) -> None:
     if PRIORITY_TEST_ENV_VAR_NAME in ctx.env_vars:
         ctx.env_vars[
             PRIORITY_TEST_ENV_VAR_NAME] += PriorityTestPlugin1.env_value
     else:
         ctx.env_vars[
             PRIORITY_TEST_ENV_VAR_NAME] = PriorityTestPlugin1.env_value
Exemplo n.º 12
0
 def modify_context(
     self,
     uris: List[str],
     plugin_config_dict: dict,
     ctx: RuntimeEnvContext,
     logger: logging.Logger,
 ) -> None:
     ctx.env_vars[MyPlugin.env_key] = str(plugin_config_dict["env_value"])
     ctx.command_prefix.append(
         f"echo {plugin_config_dict['tmp_content']} > "
         f"{plugin_config_dict['tmp_file']}")
     ctx.py_executable = (plugin_config_dict["prefix_command"] + " " +
                          ctx.py_executable)
Exemplo n.º 13
0
    def setup(self,
              runtime_env: dict,
              context: RuntimeEnvContext,
              logger: Optional[logging.Logger] = default_logger):
        if not runtime_env.get("conda") and not runtime_env.get("pip"):
            return

        logger.debug(f"Setting up conda or pip for runtime_env: {runtime_env}")
        conda_dict = get_conda_dict(runtime_env, self._resources_dir)
        if isinstance(runtime_env.get("conda"), str):
            conda_env_name = runtime_env["conda"]
        else:
            assert conda_dict is not None
            ray_pip = current_ray_pip_specifier(logger=logger)
            if ray_pip:
                extra_pip_dependencies = [ray_pip, "ray[default]"]
            elif runtime_env.get("_inject_current_ray"):
                extra_pip_dependencies = (
                    _resolve_install_from_source_ray_dependencies())
            else:
                extra_pip_dependencies = []
            conda_dict = inject_dependencies(conda_dict, _current_py_version(),
                                             extra_pip_dependencies)
            logger.info(f"Setting up conda environment with {runtime_env}")
            # It is not safe for multiple processes to install conda envs
            # concurrently, even if the envs are different, so use a global
            # lock for all conda installs.
            # See https://github.com/ray-project/ray/issues/17086
            file_lock_name = "ray-conda-install.lock"
            with FileLock(os.path.join(self._resources_dir, file_lock_name)):
                conda_dir = os.path.join(self._resources_dir, "conda")
                try_to_create_directory(conda_dir)
                conda_yaml_path = os.path.join(conda_dir, "environment.yml")
                with open(conda_yaml_path, "w") as file:
                    # Sort keys because we hash based on the file contents,
                    # and we don't want the hash to depend on the order
                    # of the dependencies.
                    yaml.dump(conda_dict, file, sort_keys=True)
                conda_env_name = get_or_create_conda_env(conda_yaml_path,
                                                         conda_dir,
                                                         logger=logger)

            if runtime_env.get("_inject_current_ray"):
                conda_path = os.path.join(conda_dir, conda_env_name)
                _inject_ray_to_conda_site(conda_path, logger=logger)

        context.py_executable = "python"
        context.command_prefix += get_conda_activate_commands(conda_env_name)
        logger.info(
            f"Finished setting up runtime environment at {conda_env_name}")
Exemplo n.º 14
0
def set_pythonpath_in_context(python_path: str, context: RuntimeEnvContext):
    """Insert the path as the first entry in PYTHONPATH in the runtime env.

    This is compatible with users providing their own PYTHONPATH in env_vars,
    and is also compatible with the existing PYTHONPATH in the cluster.

    The import priority is as follows:
    this python_path arg > env_vars PYTHONPATH > existing cluster env PYTHONPATH.
    """
    if "PYTHONPATH" in context.env_vars:
        python_path += os.pathsep + context.env_vars["PYTHONPATH"]
    if "PYTHONPATH" in os.environ:
        python_path += os.pathsep + os.environ["PYTHONPATH"]
    context.env_vars["PYTHONPATH"] = python_path
Exemplo n.º 15
0
        async def _setup_runtime_env(runtime_env, serialized_runtime_env,
                                     serialized_allocated_resource_instances):
            allocated_resource: dict = json.loads(
                serialized_allocated_resource_instances or "{}")
            # Use a separate logger for each job.
            per_job_logger = self.get_or_create_logger(request.job_id)
            # TODO(chenk008): Add log about allocated_resource to
            # avoid lint error. That will be moved to cgroup plugin.
            per_job_logger.debug(f"Worker has resource :"
                                 f"{allocated_resource}")
            context = RuntimeEnvContext(env_vars=runtime_env.env_vars())
            await self._container_manager.setup(runtime_env,
                                                context,
                                                logger=per_job_logger)

            for manager in self._base_plugin_cache_managers.values():
                await manager.create_if_needed(runtime_env,
                                               context,
                                               logger=per_job_logger)

            def setup_plugins():
                # Run setup function from all the plugins
                if runtime_env.plugins():
                    for (
                            setup_context
                    ) in self._runtime_env_plugin_manager.sorted_plugin_setup_contexts(
                            runtime_env.plugins()):
                        per_job_logger.debug(
                            f"Setting up runtime env plugin {setup_context.name}"
                        )
                        # TODO(architkulkarni): implement uri support
                        setup_context.class_instance.validate(runtime_env)
                        setup_context.class_instance.create(
                            "uri not implemented", setup_context.config,
                            context)
                        setup_context.class_instance.modify_context(
                            "uri not implemented",
                            setup_context.config,
                            context,
                            per_job_logger,
                        )

            loop = asyncio.get_event_loop()
            # Plugins setup method is sync process, running in other threads
            # is to avoid blocking asyncio loop
            await loop.run_in_executor(None, setup_plugins)

            return context
Exemplo n.º 16
0
    def modify_context(
        self,
        uri: str,
        runtime_env: "RuntimeEnv",  # noqa: F821
        context: RuntimeEnvContext,
        logger: Optional[logging.Logger] = default_logger,
    ):
        if not runtime_env.has_conda():
            return

        if runtime_env.conda_env_name():
            conda_env_name = runtime_env.conda_env_name()
        else:
            protocol, hash = parse_uri(runtime_env.conda_uri())
            conda_env_name = self._get_path_from_hash(hash)
        context.py_executable = "python"
        context.command_prefix += get_conda_activate_commands(conda_env_name)
Exemplo n.º 17
0
    def setup(self,
              runtime_env: dict,
              context: RuntimeEnvContext,
              logger: Optional[logging.Logger] = default_logger):
        if not runtime_env.get("working_dir"):
            return

        working_dir = download_and_unpack_package(runtime_env["working_dir"],
                                                  self._resources_dir,
                                                  logger=logger)
        context.command_prefix += [f"cd {working_dir}"]

        # Insert the working_dir as the first entry in PYTHONPATH. This is
        # compatible with users providing their own PYTHONPATH in env_vars.
        python_path = working_dir
        if "PYTHONPATH" in context.env_vars:
            python_path += os.pathsep + context.env_vars["PYTHONPATH"]
        context.env_vars["PYTHONPATH"] = python_path
Exemplo n.º 18
0
    def setup(self,
              runtime_env: RuntimeEnv,
              context: RuntimeEnvContext,
              logger: Optional[logging.Logger] = default_logger):
        if not runtime_env.py_modules():
            return

        module_dirs = []
        for uri in runtime_env.py_modules():
            module_dir = download_and_unpack_package(
                uri, self._resources_dir, logger=logger)
            module_dirs.append(module_dir)

        # Insert the py_modules directories into the PYTHONPATH.
        python_path = os.pathsep.join(module_dirs)
        if "PYTHONPATH" in context.env_vars:
            python_path += os.pathsep + context.env_vars["PYTHONPATH"]
        context.env_vars["PYTHONPATH"] = python_path
Exemplo n.º 19
0
    def setup(self,
              runtime_env: dict,
              context: RuntimeEnvContext,
              logger: Optional[logging.Logger] = default_logger):
        if not runtime_env.get("uris"):
            return

        working_dir = self.ensure_runtime_env_setup(runtime_env["uris"],
                                                    logger=logger)
        if working_dir is None:
            return
        context.command_prefix += [f"cd {working_dir}"]

        # Insert the working_dir as the first entry in PYTHONPATH. This is
        # compatible with users providing their own PYTHONPATH in env_vars.
        python_path = working_dir
        if "PYTHONPATH" in context.env_vars:
            python_path += os.pathsep + context.env_vars["PYTHONPATH"]
        context.env_vars["PYTHONPATH"] = python_path
Exemplo n.º 20
0
    def modify_context(self, uri: Optional[str], runtime_env_dict: Dict,
                       context: RuntimeEnvContext):
        if uri is None:
            return

        local_dir = get_local_dir_from_uri(uri, self._resources_dir)
        if not local_dir.exists():
            raise ValueError(
                f"Local directory {local_dir} for URI {uri} does "
                "not exist on the cluster. Something may have gone wrong while "
                "downloading or unpacking the working_dir.")

        context.command_prefix += [f"cd {local_dir}"]

        # Insert the working_dir as the first entry in PYTHONPATH. This is
        # compatible with users providing their own PYTHONPATH in env_vars.
        python_path = str(local_dir)
        if "PYTHONPATH" in context.env_vars:
            python_path += os.pathsep + context.env_vars["PYTHONPATH"]
        context.env_vars["PYTHONPATH"] = python_path
Exemplo n.º 21
0
    def modify_context(
        self,
        uri: str,
        runtime_env: RuntimeEnv,
        context: RuntimeEnvContext,
        logger: Optional[logging.Logger] = default_logger,
    ):
        if not runtime_env.has_pip():
            return
        # Update py_executable.
        protocol, hash = parse_uri(uri)
        target_dir = self._get_path_from_hash(hash)
        virtualenv_python = _PathHelper.get_virtualenv_python(target_dir)

        if not os.path.exists(virtualenv_python):
            raise ValueError(
                f"Local directory {target_dir} for URI {uri} does "
                "not exist on the cluster. Something may have gone wrong while "
                "installing the runtime_env `pip` packages.")
        context.py_executable = virtualenv_python
Exemplo n.º 22
0
 def modify_context(
     self,
     uri: str,
     runtime_env: RuntimeEnv,
     context: RuntimeEnvContext,
     logger: Optional[logging.Logger] = default_logger,
 ):
     if not runtime_env.has_pip():
         return
     # Insert the target directory into the PYTHONPATH.
     protocol, hash = parse_uri(uri)
     target_dir = get_local_dir_from_uri(uri, self._resources_dir)
     if not target_dir.exists():
         raise ValueError(
             f"Local directory {target_dir} for URI {uri} does "
             "not exist on the cluster. Something may have gone wrong while "
             "installing the runtime_env `pip` packages.")
     python_path = str(target_dir)
     if "PYTHONPATH" in context.env_vars:
         python_path += os.pathsep + context.env_vars["PYTHONPATH"]
     context.env_vars["PYTHONPATH"] = python_path
Exemplo n.º 23
0
async def test_create_delete_size_equal(tmpdir, ray_start_regular):
    """Tests that `create` and `delete_uri` return the same size for a URI."""

    # Create an arbitrary nonempty directory to upload.
    path = Path(tmpdir)
    dir_to_upload = path / "dir_to_upload"
    dir_to_upload.mkdir(parents=True)
    filepath = dir_to_upload / "file"
    with filepath.open("w") as file:
        file.write("F" * 100)

    uri = get_uri_for_directory(dir_to_upload)
    assert get_directory_size_bytes(dir_to_upload) > 0

    uploaded = upload_package_if_needed(uri, tmpdir, dir_to_upload)
    assert uploaded

    manager = WorkingDirManager(tmpdir)

    created_size_bytes = await manager.create(uri, {}, RuntimeEnvContext())
    deleted_size_bytes = manager.delete_uri(uri)
    assert created_size_bytes == deleted_size_bytes
Exemplo n.º 24
0
 def modify_context(
     self,
     uris: Optional[List[str]],
     runtime_env_dict: Dict,
     context: RuntimeEnvContext,
     logger: Optional[logging.Logger] = default_logger,
 ):
     if uris is None:
         return
     module_dirs = []
     for uri in uris:
         module_dir = get_local_dir_from_uri(uri, self._resources_dir)
         if not module_dir.exists():
             raise ValueError(
                 f"Local directory {module_dir} for URI {uri} does "
                 "not exist on the cluster. Something may have gone wrong while "
                 "downloading or unpacking the py_modules files.")
         module_dirs.append(str(module_dir))
     # Insert the py_modules directories into the PYTHONPATH.
     python_path = os.pathsep.join(module_dirs)
     if "PYTHONPATH" in context.env_vars:
         python_path += os.pathsep + context.env_vars["PYTHONPATH"]
     context.env_vars["PYTHONPATH"] = python_path
Exemplo n.º 25
0
from ray.core.generated.common_pb2 import Language

logger = logging.getLogger(__name__)

parser = argparse.ArgumentParser(description=(
    "Set up the environment for a Ray worker and launch the worker."))

parser.add_argument("--serialized-runtime-env",
                    type=str,
                    help="the serialized parsed runtime env dict")

parser.add_argument("--serialized-runtime-env-context",
                    type=str,
                    help="the serialized runtime env context")

parser.add_argument("--language",
                    type=str,
                    help="the language type of the worker")

if __name__ == "__main__":
    args, remaining_args = parser.parse_known_args()
    # NOTE(chenk008): we still need it to start worker in container.
    remaining_args.append("--worker-shim-pid={}".format(os.getpid()))
    # NOTE(edoakes): args.serialized_runtime_env_context is only None when
    # we're starting the main Ray client proxy server. That case should
    # probably not even go through this codepath.
    runtime_env_context = RuntimeEnvContext.deserialize(
        args.serialized_runtime_env_context or "{}")
    runtime_env_context.exec_worker(remaining_args,
                                    Language.Value(args.language))
Exemplo n.º 26
0
            def run_setup_with_logger():
                runtime_env = RuntimeEnv(serialized_runtime_env=serialized_runtime_env)
                allocated_resource: dict = json.loads(
                    serialized_allocated_resource_instances or "{}"
                )

                # Use a separate logger for each job.
                per_job_logger = self.get_or_create_logger(request.job_id)
                # TODO(chenk008): Add log about allocated_resource to
                # avoid lint error. That will be moved to cgroup plugin.
                per_job_logger.debug(f"Worker has resource :" f"{allocated_resource}")
                context = RuntimeEnvContext(env_vars=runtime_env.env_vars())
                self._container_manager.setup(
                    runtime_env, context, logger=per_job_logger
                )

                for (manager, uri_cache) in [
                    (self._working_dir_manager, self._working_dir_uri_cache),
                    (self._conda_manager, self._conda_uri_cache),
                    (self._pip_manager, self._pip_uri_cache),
                ]:
                    uri = manager.get_uri(runtime_env)
                    if uri is not None:
                        if uri not in uri_cache:
                            per_job_logger.debug(f"Cache miss for URI {uri}.")
                            size_bytes = manager.create(
                                uri, runtime_env, context, logger=per_job_logger
                            )
                            uri_cache.add(uri, size_bytes, logger=per_job_logger)
                        else:
                            per_job_logger.debug(f"Cache hit for URI {uri}.")
                            uri_cache.mark_used(uri, logger=per_job_logger)
                    manager.modify_context(uri, runtime_env, context)

                # Set up py_modules. For now, py_modules uses multiple URIs so
                # the logic is slightly different from working_dir, conda, and
                # pip above.
                py_modules_uris = self._py_modules_manager.get_uris(runtime_env)
                if py_modules_uris is not None:
                    for uri in py_modules_uris:
                        if uri not in self._py_modules_uri_cache:
                            per_job_logger.debug(f"Cache miss for URI {uri}.")
                            size_bytes = self._py_modules_manager.create(
                                uri, runtime_env, context, logger=per_job_logger
                            )
                            self._py_modules_uri_cache.add(
                                uri, size_bytes, logger=per_job_logger
                            )
                        else:
                            per_job_logger.debug(f"Cache hit for URI {uri}.")
                            self._py_modules_uri_cache.mark_used(
                                uri, logger=per_job_logger
                            )
                self._py_modules_manager.modify_context(
                    py_modules_uris, runtime_env, context
                )

                # Add the mapping of URIs -> the serialized environment to be
                # used for cache invalidation.
                if runtime_env.working_dir_uri():
                    uri = runtime_env.working_dir_uri()
                    self._uris_to_envs[uri].add(serialized_runtime_env)
                if runtime_env.py_modules_uris():
                    for uri in runtime_env.py_modules_uris():
                        self._uris_to_envs[uri].add(serialized_runtime_env)
                if runtime_env.conda_uri():
                    uri = runtime_env.conda_uri()
                    self._uris_to_envs[uri].add(serialized_runtime_env)
                if runtime_env.pip_uri():
                    uri = runtime_env.pip_uri()
                    self._uris_to_envs[uri].add(serialized_runtime_env)
                if runtime_env.plugin_uris():
                    for uri in runtime_env.plugin_uris():
                        self._uris_to_envs[uri].add(serialized_runtime_env)

                # Run setup function from all the plugins
                for plugin_class_path, config in runtime_env.plugins():
                    per_job_logger.debug(
                        f"Setting up runtime env plugin {plugin_class_path}"
                    )
                    plugin_class = import_attr(plugin_class_path)
                    # TODO(simon): implement uri support
                    plugin_class.create(
                        "uri not implemented", json.loads(config), context
                    )
                    plugin_class.modify_context(
                        "uri not implemented", json.loads(config), context
                    )

                return context
Exemplo n.º 27
0
        async def _setup_runtime_env(runtime_env, serialized_runtime_env,
                                     serialized_allocated_resource_instances):
            allocated_resource: dict = json.loads(
                serialized_allocated_resource_instances or "{}")
            # Use a separate logger for each job.
            per_job_logger = self.get_or_create_logger(request.job_id)
            # TODO(chenk008): Add log about allocated_resource to
            # avoid lint error. That will be moved to cgroup plugin.
            per_job_logger.debug(f"Worker has resource :"
                                 f"{allocated_resource}")
            context = RuntimeEnvContext(env_vars=runtime_env.env_vars())
            await self._container_manager.setup(runtime_env,
                                                context,
                                                logger=per_job_logger)

            for (manager, uri_cache) in [
                (self._working_dir_manager, self._working_dir_uri_cache),
                (self._conda_manager, self._conda_uri_cache),
                (self._pip_manager, self._pip_uri_cache),
            ]:
                uri = manager.get_uri(runtime_env)
                if uri is not None:
                    if uri not in uri_cache:
                        per_job_logger.debug(f"Cache miss for URI {uri}.")
                        size_bytes = await manager.create(
                            uri, runtime_env, context, logger=per_job_logger)
                        uri_cache.add(uri, size_bytes, logger=per_job_logger)
                    else:
                        per_job_logger.debug(f"Cache hit for URI {uri}.")
                        uri_cache.mark_used(uri, logger=per_job_logger)
                manager.modify_context(uri, runtime_env, context)

            # Set up py_modules. For now, py_modules uses multiple URIs so
            # the logic is slightly different from working_dir, conda, and
            # pip above.
            for (manager, uri_cache) in [
                (self._java_jars_manager, self._java_jars_uri_cache),
                (self._py_modules_manager, self._py_modules_uri_cache),
            ]:
                uris = manager.get_uris(runtime_env)
                if uris is not None:
                    per_job_logger.debug(f"URIs is not None, URI {uris}.")
                    for uri in uris:
                        if uri not in uri_cache:
                            per_job_logger.debug(f"Cache miss for URI {uri}.")
                            size_bytes = await manager.create(
                                uri,
                                runtime_env,
                                context,
                                logger=per_job_logger)
                            uri_cache.add(uri,
                                          size_bytes,
                                          logger=per_job_logger)
                        else:
                            per_job_logger.debug(f"Cache hit for URI {uri}.")
                            uri_cache.mark_used(uri, logger=per_job_logger)
                manager.modify_context(uris, runtime_env, context)

            def setup_plugins():
                # Run setup function from all the plugins
                for plugin_class_path, config in runtime_env.plugins():
                    per_job_logger.debug(
                        f"Setting up runtime env plugin {plugin_class_path}")
                    plugin_class = import_attr(plugin_class_path)
                    # TODO(simon): implement uri support
                    plugin_class.create("uri not implemented",
                                        json.loads(config), context)
                    plugin_class.modify_context("uri not implemented",
                                                json.loads(config), context)

            loop = asyncio.get_event_loop()
            # Plugins setup method is sync process, running in other threads
            # is to avoid  blocks asyncio loop
            await loop.run_in_executor(None, setup_plugins)

            return context