def setup(self, runtime_env: RuntimeEnv, context: RuntimeEnvContext, logger: Optional[logging.Logger] = default_logger): if not runtime_env.has_pip(): return logger.debug(f"Setting up pip for runtime_env: {runtime_env}") pip_packages: List[str] = runtime_env.pip_packages() target_dir = self._get_path_from_hash(_get_pip_hash(pip_packages)) _install_pip_list_to_dir(pip_packages, target_dir, logger=logger) # Despite Ray being removed from the input pip list during validation, # other packages in the pip list (for example, xgboost_ray) may # themselves include Ray as a dependency. In this case, we will have # inadvertently installed the latest Ray version in the target_dir, # which may cause Ray version mismatch issues. Uninstall it here, if it # exists, to make the workers use the Ray that is already # installed in the cluster. # # In the case where the user explicitly wants to include Ray in their # pip list (and signals this by setting the environment variable below) # then we don't want this deletion logic, so we skip it. if os.environ.get(RAY_RUNTIME_ENV_ALLOW_RAY_IN_PIP) != 1: ray_path = Path(target_dir) / "ray" if ray_path.exists() and ray_path.is_dir(): shutil.rmtree(ray_path) # Insert the target directory into the PYTHONPATH. python_path = target_dir if "PYTHONPATH" in context.env_vars: python_path += os.pathsep + context.env_vars["PYTHONPATH"] context.env_vars["PYTHONPATH"] = python_path
def setup( self, runtime_env: RuntimeEnv, context: RuntimeEnvContext, logger: Optional[logging.Logger] = default_logger, ): if not runtime_env.has_py_container( ) or not runtime_env.py_container_image(): return container_driver = "podman" container_command = [ container_driver, "run", "-v", self._ray_tmp_dir + ":" + self._ray_tmp_dir, "--cgroup-manager=cgroupfs", "--network=host", "--pid=host", "--ipc=host", "--env-host", ] container_command.append("--env") container_command.append("RAY_RAYLET_PID=" + os.getenv("RAY_RAYLET_PID")) if runtime_env.py_container_run_options(): container_command.extend(runtime_env.py_container_run_options()) # TODO(chenk008): add resource limit container_command.append("--entrypoint") container_command.append("python") container_command.append(runtime_env.py_container_image()) context.py_executable = " ".join(container_command) logger.info("start worker in container with prefix: {}".format( context.py_executable))
def setup( self, runtime_env: RuntimeEnv, context: RuntimeEnvContext, logger: Optional[logging.Logger] = default_logger, ): if not runtime_env.has_conda(): return logger.debug("Setting up conda or pip for runtime_env: " f"{runtime_env.serialize()}") if runtime_env.conda_env_name(): conda_env_name = runtime_env.conda_env_name() else: protocol, hash = parse_uri(runtime_env.conda_uri()) conda_env_name = self._get_path_from_hash(hash) conda_dict = _get_conda_dict_with_ray_inserted(runtime_env, logger=logger) # It is not safe for multiple processes to install conda envs # concurrently, even if the envs are different, so use a global # lock for all conda installs. # See https://github.com/ray-project/ray/issues/17086 file_lock_name = "ray-conda-install.lock" with FileLock(os.path.join(self._resources_dir, file_lock_name)): try: conda_yaml_file = os.path.join(self._resources_dir, "environment.yml") with open(conda_yaml_file, "w") as file: yaml.dump(conda_dict, file) if conda_env_name in self._created_envs: logger.debug(f"Conda env {conda_env_name} already " "created, skipping creation.") else: create_conda_env(conda_yaml_file, prefix=conda_env_name, logger=logger) self._created_envs.add(conda_env_name) finally: os.remove(conda_yaml_file) if runtime_env.get_extension("_inject_current_ray"): _inject_ray_to_conda_site(conda_path=conda_env_name, logger=logger) context.py_executable = "python" context.command_prefix += get_conda_activate_commands(conda_env_name) logger.info( f"Finished setting up runtime environment at {conda_env_name}")
def create( self, uri: Optional[str], runtime_env: RuntimeEnv, context: RuntimeEnvContext, logger: Optional[logging.Logger] = default_logger, ) -> int: logger.debug("Setting up conda for runtime_env: " f"{runtime_env.serialize()}") protocol, hash = parse_uri(uri) conda_env_name = self._get_path_from_hash(hash) conda_dict = _get_conda_dict_with_ray_inserted(runtime_env, logger=logger) logger.info(f"Setting up conda environment with {runtime_env}") with FileLock(self._installs_and_deletions_file_lock): try: conda_yaml_file = os.path.join(self._resources_dir, "environment.yml") with open(conda_yaml_file, "w") as file: yaml.dump(conda_dict, file) create_conda_env_if_needed( conda_yaml_file, prefix=conda_env_name, logger=logger ) finally: os.remove(conda_yaml_file) if runtime_env.get_extension("_inject_current_ray") == "True": _inject_ray_to_conda_site(conda_path=conda_env_name, logger=logger) logger.info(f"Finished creating conda environment at {conda_env_name}") return get_directory_size_bytes(conda_env_name)
def create( self, uri: str, runtime_env: RuntimeEnv, context: RuntimeEnvContext, logger: Optional[logging.Logger] = default_logger, ) -> int: logger.debug("Setting up pip for runtime_env: " f"{runtime_env.serialize()}") protocol, hash = parse_uri(uri) target_dir = self._get_path_from_hash(hash) pip_packages: List[str] = runtime_env.pip_packages() with FileLock(self._installs_and_deletions_file_lock): _install_pip_list_to_dir(pip_packages, target_dir, logger=logger) # Despite Ray being removed from the input pip list during # validation, other packages in the pip list (for example, # xgboost_ray) may themselves include Ray as a dependency. In this # case, we will have inadvertently installed the latest Ray version # in the target_dir, which may cause Ray version mismatch issues. # Uninstall it here, if it exists, to make the workers use the Ray # that is already installed in the cluster. # # In the case where the user explicitly wants to include Ray in # their pip list (and signals this by setting the environment # variable below) then we don't want this deletion logic, so we # skip it. if os.environ.get(RAY_RUNTIME_ENV_ALLOW_RAY_IN_PIP) != 1: ray_path = Path(target_dir) / "ray" if ray_path.exists() and ray_path.is_dir(): shutil.rmtree(ray_path) return get_directory_size_bytes(target_dir)
def test_get_conda_dict_with_ray_inserted_m1_wheel(monkeypatch): # Disable dev mode to prevent Ray dependencies being automatically inserted # into the conda dict. if os.environ.get("RAY_RUNTIME_ENV_LOCAL_DEV_MODE") is not None: monkeypatch.delenv("RAY_RUNTIME_ENV_LOCAL_DEV_MODE") if os.environ.get("RAY_CI_POST_WHEEL_TESTS") is not None: monkeypatch.delenv("RAY_CI_POST_WHEEL_TESTS") monkeypatch.setattr(ray, "__version__", "1.9.0") monkeypatch.setattr(ray, "__commit__", "92599d9127e228fe8d0a2d94ca75754ec21c4ae4") monkeypatch.setattr(sys, "version_info", (3, 9, 7, "final", 0)) # Simulate running on an M1 Mac. monkeypatch.setattr(sys, "platform", "darwin") monkeypatch.setattr(platform, "machine", lambda: "arm64") input_conda = {"dependencies": ["blah", "pip", {"pip": ["pip_pkg"]}]} runtime_env = RuntimeEnv(ParsedRuntimeEnv({"conda": input_conda}).serialize()) output_conda = _get_conda_dict_with_ray_inserted(runtime_env) # M1 wheels are not uploaded to AWS S3. So rather than have an S3 URL # inserted as a dependency, we should just have the string "ray==1.9.0". assert output_conda == { "dependencies": [ "blah", "pip", {"pip": ["ray==1.9.0", "ray[default]", "pip_pkg"]}, "python=3.9.7", ] }
def get_proto_runtime_env(self): """Return the protobuf structure of runtime env.""" if self._cached_pb is None: self._cached_pb = RuntimeEnv.from_dict(self, get_conda_uri, get_pip_uri) return self._cached_pb
def modify_context( self, uri: str, runtime_env: RuntimeEnv, context: RuntimeEnvContext, logger: Optional[logging.Logger] = default_logger, ): if not runtime_env.has_conda(): return if runtime_env.conda_env_name(): conda_env_name = runtime_env.conda_env_name() else: protocol, hash = parse_uri(runtime_env.conda_uri()) conda_env_name = self._get_path_from_hash(hash) context.py_executable = "python" context.command_prefix += get_conda_activate_commands(conda_env_name)
def _get_conda_dict_with_ray_inserted( runtime_env: RuntimeEnv, logger: Optional[logging.Logger] = default_logger ) -> Dict[str, Any]: """Returns the conda spec with the Ray and `python` dependency inserted.""" conda_dict = json.loads(runtime_env.conda_config()) assert conda_dict is not None ray_pip = current_ray_pip_specifier(logger=logger) if ray_pip: extra_pip_dependencies = [ray_pip, "ray[default]"] elif runtime_env.get_extension("_inject_current_ray") == "True": extra_pip_dependencies = _resolve_install_from_source_ray_dependencies() else: extra_pip_dependencies = [] conda_dict = inject_dependencies( conda_dict, _current_py_version(), extra_pip_dependencies ) return conda_dict
def setup(self, runtime_env: RuntimeEnv, context: RuntimeEnvContext, logger: Optional[logging.Logger] = default_logger): if not runtime_env.py_modules(): return module_dirs = [] for uri in runtime_env.py_modules(): module_dir = download_and_unpack_package( uri, self._resources_dir, logger=logger) module_dirs.append(module_dir) # Insert the py_modules directories into the PYTHONPATH. python_path = os.pathsep.join(module_dirs) if "PYTHONPATH" in context.env_vars: python_path += os.pathsep + context.env_vars["PYTHONPATH"] context.env_vars["PYTHONPATH"] = python_path
def setup(self, runtime_env: RuntimeEnv, context: RuntimeEnvContext, logger: Optional[logging.Logger] = default_logger): if not runtime_env.working_dir(): return logger.info(f"Setup working dir for {runtime_env.working_dir()}") working_dir = download_and_unpack_package(runtime_env.working_dir(), self._resources_dir, logger=logger) context.command_prefix += [f"cd {working_dir}"] # Insert the working_dir as the first entry in PYTHONPATH. This is # compatible with users providing their own PYTHONPATH in env_vars. python_path = working_dir if "PYTHONPATH" in context.env_vars: python_path += os.pathsep + context.env_vars["PYTHONPATH"] context.env_vars["PYTHONPATH"] = python_path
def test_sync_job_config(shutdown_only): num_java_workers_per_process = 8 runtime_env = {"env_vars": {"key": "value"}} ray.init(job_config=ray.job_config.JobConfig( num_java_workers_per_process=num_java_workers_per_process, runtime_env=runtime_env)) # Check that the job config is synchronized at the driver side. job_config = ray.worker.global_worker.core_worker.get_job_config() assert (job_config.num_java_workers_per_process == num_java_workers_per_process) job_runtime_env = RuntimeEnv(serialized_runtime_env=job_config. runtime_env_info.serialized_runtime_env) assert job_runtime_env.env_vars() == runtime_env["env_vars"] @ray.remote def get_job_config(): job_config = ray.worker.global_worker.core_worker.get_job_config() return job_config.SerializeToString() # Check that the job config is synchronized at the worker side. job_config = gcs_utils.JobConfig() job_config.ParseFromString(ray.get(get_job_config.remote())) assert (job_config.num_java_workers_per_process == num_java_workers_per_process) job_runtime_env = RuntimeEnv(serialized_runtime_env=job_config. runtime_env_info.serialized_runtime_env) assert job_runtime_env.env_vars() == runtime_env["env_vars"]
async def create( self, uri: str, runtime_env: RuntimeEnv, context: RuntimeEnvContext, logger: Optional[logging.Logger] = default_logger, ) -> int: if not runtime_env.has_pip(): return 0 protocol, hash = parse_uri(uri) target_dir = self._get_path_from_hash(hash) with FileLock(self._installs_and_deletions_file_lock): pip_processor = PipProcessor(target_dir, runtime_env, logger) pip_processor.run() return get_directory_size_bytes(target_dir)
def modify_context( self, uri: str, runtime_env: RuntimeEnv, context: RuntimeEnvContext, logger: Optional[logging.Logger] = default_logger, ): if not runtime_env.has_pip(): return # Update py_executable. protocol, hash = parse_uri(uri) target_dir = self._get_path_from_hash(hash) virtualenv_python = _PathHelper.get_virtualenv_python(target_dir) if not os.path.exists(virtualenv_python): raise ValueError( f"Local directory {target_dir} for URI {uri} does " "not exist on the cluster. Something may have gone wrong while " "installing the runtime_env `pip` packages.") context.py_executable = virtualenv_python
def modify_context( self, uri: str, runtime_env: RuntimeEnv, context: RuntimeEnvContext, logger: Optional[logging.Logger] = default_logger, ): if not runtime_env.has_pip(): return # Insert the target directory into the PYTHONPATH. protocol, hash = parse_uri(uri) target_dir = get_local_dir_from_uri(uri, self._resources_dir) if not target_dir.exists(): raise ValueError( f"Local directory {target_dir} for URI {uri} does " "not exist on the cluster. Something may have gone wrong while " "installing the runtime_env `pip` packages.") python_path = str(target_dir) if "PYTHONPATH" in context.env_vars: python_path += os.pathsep + context.env_vars["PYTHONPATH"] context.env_vars["PYTHONPATH"] = python_path
def run_setup_with_logger(): runtime_env = RuntimeEnv( serialized_runtime_env=serialized_runtime_env) allocated_resource: dict = json.loads( serialized_allocated_resource_instances or "{}") # Use a separate logger for each job. per_job_logger = self.get_or_create_logger(request.job_id) # TODO(chenk008): Add log about allocated_resource to # avoid lint error. That will be moved to cgroup plugin. per_job_logger.debug(f"Worker has resource :" f"{allocated_resource}") context = RuntimeEnvContext(env_vars=runtime_env.env_vars()) self._conda_manager.setup(runtime_env, context, logger=per_job_logger) self._py_modules_manager.setup(runtime_env, context, logger=per_job_logger) self._working_dir_manager.setup(runtime_env, context, logger=per_job_logger) self._container_manager.setup(runtime_env, context, logger=per_job_logger) # Add the mapping of URIs -> the serialized environment to be # used for cache invalidation. if runtime_env.working_dir_uri(): uri = runtime_env.working_dir_uri() self._uris_to_envs[uri].add(serialized_runtime_env) if runtime_env.py_modules_uris(): for uri in runtime_env.py_modules_uris(): self._uris_to_envs[uri].add(serialized_runtime_env) if runtime_env.conda_uri(): uri = runtime_env.conda_uri() self._uris_to_envs[uri].add(serialized_runtime_env) if runtime_env.plugin_uris(): for uri in runtime_env.plugin_uris(): self._uris_to_envs[uri].add(serialized_runtime_env) # Run setup function from all the plugins for plugin_class_path, config in runtime_env.plugins(): logger.debug( f"Setting up runtime env plugin {plugin_class_path}") plugin_class = import_attr(plugin_class_path) # TODO(simon): implement uri support plugin_class.create("uri not implemented", json.loads(config), context) plugin_class.modify_context("uri not implemented", json.loads(config), context) return context
def get_uri(self, runtime_env: RuntimeEnv) -> Optional[str]: working_dir_uri = runtime_env.working_dir() if working_dir_uri != "": return working_dir_uri return None
def get_uri(self, runtime_env: RuntimeEnv) -> Optional[str]: """Return the conda URI from the RuntimeEnv if it exists, else None.""" conda_uri = runtime_env.conda_uri() if conda_uri != "": return conda_uri return None
def deserialize(cls, serialized: str) -> "ParsedRuntimeEnv": runtime_env = RuntimeEnv(serialized_runtime_env=serialized) return cls(runtime_env.to_dict(), _validate=False)
def run_setup_with_logger(): runtime_env = RuntimeEnv(serialized_runtime_env=serialized_runtime_env) allocated_resource: dict = json.loads( serialized_allocated_resource_instances or "{}" ) # Use a separate logger for each job. per_job_logger = self.get_or_create_logger(request.job_id) # TODO(chenk008): Add log about allocated_resource to # avoid lint error. That will be moved to cgroup plugin. per_job_logger.debug(f"Worker has resource :" f"{allocated_resource}") context = RuntimeEnvContext(env_vars=runtime_env.env_vars()) self._container_manager.setup( runtime_env, context, logger=per_job_logger ) for (manager, uri_cache) in [ (self._working_dir_manager, self._working_dir_uri_cache), (self._conda_manager, self._conda_uri_cache), (self._pip_manager, self._pip_uri_cache), ]: uri = manager.get_uri(runtime_env) if uri is not None: if uri not in uri_cache: per_job_logger.debug(f"Cache miss for URI {uri}.") size_bytes = manager.create( uri, runtime_env, context, logger=per_job_logger ) uri_cache.add(uri, size_bytes, logger=per_job_logger) else: per_job_logger.debug(f"Cache hit for URI {uri}.") uri_cache.mark_used(uri, logger=per_job_logger) manager.modify_context(uri, runtime_env, context) # Set up py_modules. For now, py_modules uses multiple URIs so # the logic is slightly different from working_dir, conda, and # pip above. py_modules_uris = self._py_modules_manager.get_uris(runtime_env) if py_modules_uris is not None: for uri in py_modules_uris: if uri not in self._py_modules_uri_cache: per_job_logger.debug(f"Cache miss for URI {uri}.") size_bytes = self._py_modules_manager.create( uri, runtime_env, context, logger=per_job_logger ) self._py_modules_uri_cache.add( uri, size_bytes, logger=per_job_logger ) else: per_job_logger.debug(f"Cache hit for URI {uri}.") self._py_modules_uri_cache.mark_used( uri, logger=per_job_logger ) self._py_modules_manager.modify_context( py_modules_uris, runtime_env, context ) # Add the mapping of URIs -> the serialized environment to be # used for cache invalidation. if runtime_env.working_dir_uri(): uri = runtime_env.working_dir_uri() self._uris_to_envs[uri].add(serialized_runtime_env) if runtime_env.py_modules_uris(): for uri in runtime_env.py_modules_uris(): self._uris_to_envs[uri].add(serialized_runtime_env) if runtime_env.conda_uri(): uri = runtime_env.conda_uri() self._uris_to_envs[uri].add(serialized_runtime_env) if runtime_env.pip_uri(): uri = runtime_env.pip_uri() self._uris_to_envs[uri].add(serialized_runtime_env) if runtime_env.plugin_uris(): for uri in runtime_env.plugin_uris(): self._uris_to_envs[uri].add(serialized_runtime_env) # Run setup function from all the plugins for plugin_class_path, config in runtime_env.plugins(): per_job_logger.debug( f"Setting up runtime env plugin {plugin_class_path}" ) plugin_class = import_attr(plugin_class_path) # TODO(simon): implement uri support plugin_class.create( "uri not implemented", json.loads(config), context ) plugin_class.modify_context( "uri not implemented", json.loads(config), context ) return context
def get_uri(self, runtime_env: RuntimeEnv) -> Optional[str]: """Return the pip URI from the RuntimeEnv if it exists, else None.""" pip_uri = runtime_env.pip_uri() if pip_uri != "": return pip_uri return None