Пример #1
0
            def run_setup_with_logger():
                runtime_env = RuntimeEnv(
                    serialized_runtime_env=serialized_runtime_env)
                allocated_resource: dict = json.loads(
                    serialized_allocated_resource_instances or "{}")

                # Use a separate logger for each job.
                per_job_logger = self.get_or_create_logger(request.job_id)
                # TODO(chenk008): Add log about allocated_resource to
                # avoid lint error. That will be moved to cgroup plugin.
                per_job_logger.debug(f"Worker has resource :"
                                     f"{allocated_resource}")
                context = RuntimeEnvContext(env_vars=runtime_env.env_vars())
                self._pip_manager.setup(
                    runtime_env, context, logger=per_job_logger)
                self._conda_manager.setup(
                    runtime_env, context, logger=per_job_logger)
                self._py_modules_manager.setup(
                    runtime_env, context, logger=per_job_logger)
                self._working_dir_manager.setup(
                    runtime_env, context, logger=per_job_logger)
                self._container_manager.setup(
                    runtime_env, context, logger=per_job_logger)

                # Add the mapping of URIs -> the serialized environment to be
                # used for cache invalidation.
                if runtime_env.working_dir_uri():
                    uri = runtime_env.working_dir_uri()
                    self._uris_to_envs[uri].add(serialized_runtime_env)
                if runtime_env.py_modules_uris():
                    for uri in runtime_env.py_modules_uris():
                        self._uris_to_envs[uri].add(serialized_runtime_env)
                if runtime_env.conda_uri():
                    uri = runtime_env.conda_uri()
                    self._uris_to_envs[uri].add(serialized_runtime_env)
                if runtime_env.pip_uri():
                    uri = runtime_env.pip_uri()
                    self._uris_to_envs[uri].add(serialized_runtime_env)
                if runtime_env.plugin_uris():
                    for uri in runtime_env.plugin_uris():
                        self._uris_to_envs[uri].add(serialized_runtime_env)

                # Run setup function from all the plugins
                for plugin_class_path, config in runtime_env.plugins():
                    logger.debug(
                        f"Setting up runtime env plugin {plugin_class_path}")
                    plugin_class = import_attr(plugin_class_path)
                    # TODO(simon): implement uri support
                    plugin_class.create("uri not implemented",
                                        json.loads(config), context)
                    plugin_class.modify_context("uri not implemented",
                                                json.loads(config), context)

                return context
Пример #2
0
    def setup(self,
              runtime_env: RuntimeEnv,
              context: RuntimeEnvContext,
              logger: Optional[logging.Logger] = default_logger):
        if not runtime_env.has_conda() and not runtime_env.has_pip():
            return

        logger.debug("Setting up conda or pip for runtime_env: "
                     f"{runtime_env.serialize()}")

        if runtime_env.conda_env_name():
            conda_env_name = runtime_env.conda_env_name()
        else:
            conda_dict = get_conda_dict(runtime_env, self._resources_dir)
            protocol, hash = parse_uri(runtime_env.conda_uri())
            conda_env_name = self._get_path_from_hash(hash)
            assert conda_dict is not None

            ray_pip = current_ray_pip_specifier(logger=logger)
            if ray_pip:
                extra_pip_dependencies = [ray_pip, "ray[default]"]
            elif runtime_env.get_extension("_inject_current_ray") == "True":
                extra_pip_dependencies = (
                    _resolve_install_from_source_ray_dependencies())
            else:
                extra_pip_dependencies = []
            conda_dict = inject_dependencies(conda_dict, _current_py_version(),
                                             extra_pip_dependencies)

            # It is not safe for multiple processes to install conda envs
            # concurrently, even if the envs are different, so use a global
            # lock for all conda installs.
            # See https://github.com/ray-project/ray/issues/17086
            file_lock_name = "ray-conda-install.lock"
            with FileLock(os.path.join(self._resources_dir, file_lock_name)):
                try:
                    conda_yaml_file = os.path.join(self._resources_dir,
                                                   "environment.yml")
                    with open(conda_yaml_file, "w") as file:
                        yaml.dump(conda_dict, file)

                    if conda_env_name in self._created_envs:
                        logger.debug(f"Conda env {conda_env_name} already "
                                     "created, skipping creation.")
                    else:
                        create_conda_env(conda_yaml_file,
                                         prefix=conda_env_name,
                                         logger=logger)
                        self._created_envs.add(conda_env_name)
                finally:
                    os.remove(conda_yaml_file)

                if runtime_env.get_extension("_inject_current_ray"):
                    _inject_ray_to_conda_site(conda_path=conda_env_name,
                                              logger=logger)

        context.py_executable = "python"
        context.command_prefix += get_conda_activate_commands(conda_env_name)
        logger.info(
            f"Finished setting up runtime environment at {conda_env_name}")
Пример #3
0
    def modify_context(
        self,
        uri: str,
        runtime_env: RuntimeEnv,
        context: RuntimeEnvContext,
        logger: Optional[logging.Logger] = default_logger,
    ):
        if not runtime_env.has_conda():
            return

        if runtime_env.conda_env_name():
            conda_env_name = runtime_env.conda_env_name()
        else:
            protocol, hash = parse_uri(runtime_env.conda_uri())
            conda_env_name = self._get_path_from_hash(hash)
        context.py_executable = "python"
        context.command_prefix += get_conda_activate_commands(conda_env_name)
Пример #4
0
            def run_setup_with_logger():
                runtime_env = RuntimeEnv(serialized_runtime_env=serialized_runtime_env)
                allocated_resource: dict = json.loads(
                    serialized_allocated_resource_instances or "{}"
                )

                # Use a separate logger for each job.
                per_job_logger = self.get_or_create_logger(request.job_id)
                # TODO(chenk008): Add log about allocated_resource to
                # avoid lint error. That will be moved to cgroup plugin.
                per_job_logger.debug(f"Worker has resource :" f"{allocated_resource}")
                context = RuntimeEnvContext(env_vars=runtime_env.env_vars())
                self._container_manager.setup(
                    runtime_env, context, logger=per_job_logger
                )

                for (manager, uri_cache) in [
                    (self._working_dir_manager, self._working_dir_uri_cache),
                    (self._conda_manager, self._conda_uri_cache),
                    (self._pip_manager, self._pip_uri_cache),
                ]:
                    uri = manager.get_uri(runtime_env)
                    if uri is not None:
                        if uri not in uri_cache:
                            per_job_logger.debug(f"Cache miss for URI {uri}.")
                            size_bytes = manager.create(
                                uri, runtime_env, context, logger=per_job_logger
                            )
                            uri_cache.add(uri, size_bytes, logger=per_job_logger)
                        else:
                            per_job_logger.debug(f"Cache hit for URI {uri}.")
                            uri_cache.mark_used(uri, logger=per_job_logger)
                    manager.modify_context(uri, runtime_env, context)

                # Set up py_modules. For now, py_modules uses multiple URIs so
                # the logic is slightly different from working_dir, conda, and
                # pip above.
                py_modules_uris = self._py_modules_manager.get_uris(runtime_env)
                if py_modules_uris is not None:
                    for uri in py_modules_uris:
                        if uri not in self._py_modules_uri_cache:
                            per_job_logger.debug(f"Cache miss for URI {uri}.")
                            size_bytes = self._py_modules_manager.create(
                                uri, runtime_env, context, logger=per_job_logger
                            )
                            self._py_modules_uri_cache.add(
                                uri, size_bytes, logger=per_job_logger
                            )
                        else:
                            per_job_logger.debug(f"Cache hit for URI {uri}.")
                            self._py_modules_uri_cache.mark_used(
                                uri, logger=per_job_logger
                            )
                self._py_modules_manager.modify_context(
                    py_modules_uris, runtime_env, context
                )

                # Add the mapping of URIs -> the serialized environment to be
                # used for cache invalidation.
                if runtime_env.working_dir_uri():
                    uri = runtime_env.working_dir_uri()
                    self._uris_to_envs[uri].add(serialized_runtime_env)
                if runtime_env.py_modules_uris():
                    for uri in runtime_env.py_modules_uris():
                        self._uris_to_envs[uri].add(serialized_runtime_env)
                if runtime_env.conda_uri():
                    uri = runtime_env.conda_uri()
                    self._uris_to_envs[uri].add(serialized_runtime_env)
                if runtime_env.pip_uri():
                    uri = runtime_env.pip_uri()
                    self._uris_to_envs[uri].add(serialized_runtime_env)
                if runtime_env.plugin_uris():
                    for uri in runtime_env.plugin_uris():
                        self._uris_to_envs[uri].add(serialized_runtime_env)

                # Run setup function from all the plugins
                for plugin_class_path, config in runtime_env.plugins():
                    per_job_logger.debug(
                        f"Setting up runtime env plugin {plugin_class_path}"
                    )
                    plugin_class = import_attr(plugin_class_path)
                    # TODO(simon): implement uri support
                    plugin_class.create(
                        "uri not implemented", json.loads(config), context
                    )
                    plugin_class.modify_context(
                        "uri not implemented", json.loads(config), context
                    )

                return context
Пример #5
0
 def get_uri(self, runtime_env: RuntimeEnv) -> Optional[str]:
     """Return the conda URI from the RuntimeEnv if it exists, else None."""
     conda_uri = runtime_env.conda_uri()
     if conda_uri != "":
         return conda_uri
     return None