Пример #1
0
    def _upload_working_dir_if_needed(self, runtime_env: Dict[str, Any]):
        if "working_dir" in runtime_env:
            working_dir = runtime_env["working_dir"]
            try:
                parse_uri(working_dir)
                is_uri = True
                logger.debug("working_dir is already a valid URI.")
            except ValueError:
                is_uri = False

            if not is_uri:
                logger.debug("working_dir is not a URI, attempting to upload.")
                package_uri = self._upload_package_if_needed(
                    working_dir, excludes=runtime_env.get("excludes", None))
                runtime_env["working_dir"] = package_uri
Пример #2
0
    def runtime_env_contains_remote_uris(cls, v):
        # Ensure that all uris in py_modules and working_dir are remote

        if v is None:
            return

        uris = v.get("py_modules", [])
        if "working_dir" in v:
            uris.append(v["working_dir"])

        for uri in uris:
            if uri is not None:
                parse_uri(uri)

        return v
Пример #3
0
    def modify_context(
        self,
        uris: List[str],
        runtime_env: "RuntimeEnv",  # noqa: F821
        context: RuntimeEnvContext,
        logger: Optional[logging.Logger] = default_logger,
    ):
        if not runtime_env.has_pip():
            return
        # PipPlugin only uses a single URI.
        uri = uris[0]
        # Update py_executable.
        protocol, hash = parse_uri(uri)
        target_dir = self._get_path_from_hash(hash)
        virtualenv_python = _PathHelper.get_virtualenv_python(target_dir)

        if not os.path.exists(virtualenv_python):
            raise ValueError(
                f"Local directory {target_dir} for URI {uri} does "
                "not exist on the cluster. Something may have gone wrong while "
                "installing the runtime_env `pip` packages.")
        context.py_executable = virtualenv_python
        context.command_prefix += [
            _PathHelper.get_virtualenv_activate_command(target_dir)
        ]
Пример #4
0
    def create(
        self,
        uri: str,
        runtime_env: RuntimeEnv,
        context: RuntimeEnvContext,
        logger: Optional[logging.Logger] = default_logger,
    ) -> int:
        logger.debug("Setting up pip for runtime_env: "
                     f"{runtime_env.serialize()}")
        protocol, hash = parse_uri(uri)
        target_dir = self._get_path_from_hash(hash)

        pip_packages: List[str] = runtime_env.pip_packages()
        with FileLock(self._installs_and_deletions_file_lock):
            _install_pip_list_to_dir(pip_packages, target_dir, logger=logger)

            # Despite Ray being removed from the input pip list during
            # validation, other packages in the pip list (for example,
            # xgboost_ray) may themselves include Ray as a dependency.  In this
            # case, we will have inadvertently installed the latest Ray version
            # in the target_dir, which may cause Ray version mismatch issues.
            # Uninstall it here, if it exists, to make the workers use the Ray
            # that is already installed in the cluster.
            #
            # In the case where the user explicitly wants to include Ray in
            # their pip list (and signals this by setting the environment
            # variable below) then we don't want this deletion logic, so we
            # skip it.
            if os.environ.get(RAY_RUNTIME_ENV_ALLOW_RAY_IN_PIP) != 1:
                ray_path = Path(target_dir) / "ray"
                if ray_path.exists() and ray_path.is_dir():
                    shutil.rmtree(ray_path)
        return get_directory_size_bytes(target_dir)
Пример #5
0
    def delete_uri(self,
                   uri: str,
                   logger: Optional[logging.Logger] = default_logger) -> int:
        """Delete URI and return the number of bytes deleted."""
        logger.info("Got request to delete pip URI %s", uri)
        protocol, hash = parse_uri(uri)
        if protocol != Protocol.PIP:
            raise ValueError("PipManager can only delete URIs with protocol "
                             f"pip. Received protocol {protocol}, URI {uri}")

        # Cancel running create task.
        task = self._creating_task.pop(hash, None)
        if task is not None:
            task.cancel()

        pip_env_path = self._get_path_from_hash(hash)
        local_dir_size = get_directory_size_bytes(pip_env_path)
        del self._create_locks[uri]
        try:
            shutil.rmtree(pip_env_path)
        except OSError as e:
            logger.warning(
                f"Error when deleting pip env {pip_env_path}: {str(e)}")
            return 0

        return local_dir_size
Пример #6
0
def uri_to_http_components(package_uri: str) -> Tuple[str, str]:
    if not package_uri.endswith(".zip"):
        raise ValueError(f"package_uri ({package_uri}) does not end in .zip")
    # We need to strip the gcs:// prefix and .zip suffix to make it
    # possible to pass the package_uri over HTTP.
    protocol, package_name = parse_uri(package_uri)
    return protocol.value, package_name[:-len(".zip")]
Пример #7
0
        def _create():
            logger.debug("Setting up conda for runtime_env: "
                         f"{runtime_env.serialize()}")
            protocol, hash = parse_uri(uri)
            conda_env_name = self._get_path_from_hash(hash)

            conda_dict = _get_conda_dict_with_ray_inserted(runtime_env,
                                                           logger=logger)

            logger.info(f"Setting up conda environment with {runtime_env}")
            with FileLock(self._installs_and_deletions_file_lock):
                try:
                    conda_yaml_file = os.path.join(self._resources_dir,
                                                   "environment.yml")
                    with open(conda_yaml_file, "w") as file:
                        yaml.dump(conda_dict, file)
                    create_conda_env_if_needed(conda_yaml_file,
                                               prefix=conda_env_name,
                                               logger=logger)
                finally:
                    os.remove(conda_yaml_file)

                if runtime_env.get_extension("_inject_current_ray") == "True":
                    _inject_ray_to_conda_site(conda_path=conda_env_name,
                                              logger=logger)
            logger.info(
                f"Finished creating conda environment at {conda_env_name}")
            return get_directory_size_bytes(conda_env_name)
Пример #8
0
    def setup(self,
              runtime_env: RuntimeEnv,
              context: RuntimeEnvContext,
              logger: Optional[logging.Logger] = default_logger):
        if not runtime_env.has_conda() and not runtime_env.has_pip():
            return

        logger.debug("Setting up conda or pip for runtime_env: "
                     f"{runtime_env.serialize()}")

        if runtime_env.conda_env_name():
            conda_env_name = runtime_env.conda_env_name()
        else:
            conda_dict = get_conda_dict(runtime_env, self._resources_dir)
            protocol, hash = parse_uri(runtime_env.conda_uri())
            conda_env_name = self._get_path_from_hash(hash)
            assert conda_dict is not None

            ray_pip = current_ray_pip_specifier(logger=logger)
            if ray_pip:
                extra_pip_dependencies = [ray_pip, "ray[default]"]
            elif runtime_env.get_extension("_inject_current_ray") == "True":
                extra_pip_dependencies = (
                    _resolve_install_from_source_ray_dependencies())
            else:
                extra_pip_dependencies = []
            conda_dict = inject_dependencies(conda_dict, _current_py_version(),
                                             extra_pip_dependencies)

            # It is not safe for multiple processes to install conda envs
            # concurrently, even if the envs are different, so use a global
            # lock for all conda installs.
            # See https://github.com/ray-project/ray/issues/17086
            file_lock_name = "ray-conda-install.lock"
            with FileLock(os.path.join(self._resources_dir, file_lock_name)):
                try:
                    conda_yaml_file = os.path.join(self._resources_dir,
                                                   "environment.yml")
                    with open(conda_yaml_file, "w") as file:
                        yaml.dump(conda_dict, file)

                    if conda_env_name in self._created_envs:
                        logger.debug(f"Conda env {conda_env_name} already "
                                     "created, skipping creation.")
                    else:
                        create_conda_env(conda_yaml_file,
                                         prefix=conda_env_name,
                                         logger=logger)
                        self._created_envs.add(conda_env_name)
                finally:
                    os.remove(conda_yaml_file)

                if runtime_env.get_extension("_inject_current_ray"):
                    _inject_ray_to_conda_site(conda_path=conda_env_name,
                                              logger=logger)

        context.py_executable = "python"
        context.command_prefix += get_conda_activate_commands(conda_env_name)
        logger.info(
            f"Finished setting up runtime environment at {conda_env_name}")
Пример #9
0
    async def create(
        self,
        uri: str,
        runtime_env: "RuntimeEnv",  # noqa: F821
        context: RuntimeEnvContext,
        logger: Optional[logging.Logger] = default_logger,
    ) -> int:
        if not runtime_env.has_pip():
            return 0

        protocol, hash = parse_uri(uri)
        target_dir = self._get_path_from_hash(hash)

        async def _create_for_hash():
            await PipProcessor(
                target_dir,
                runtime_env,
                logger,
            )

            loop = get_running_loop()
            return await loop.run_in_executor(None, get_directory_size_bytes,
                                              target_dir)

        if uri not in self._create_locks:
            # async lock to prevent the same virtualenv being concurrently installed
            self._create_locks[uri] = asyncio.Lock()

        async with self._create_locks[uri]:
            self._creating_task[hash] = task = create_task(_create_for_hash())
            task.add_done_callback(
                lambda _: self._creating_task.pop(hash, None))
            return await task
Пример #10
0
def upload_working_dir_if_needed(
    runtime_env: Dict[str, Any],
    scratch_dir: str,
    logger: Optional[logging.Logger] = default_logger,
) -> Dict[str, Any]:
    """Uploads the working_dir and replaces it with a URI.

    If the working_dir is already a URI, this is a no-op.
    """
    working_dir = runtime_env.get("working_dir")
    if working_dir is None:
        return runtime_env

    if not isinstance(working_dir, str) and not isinstance(working_dir, Path):
        raise TypeError(
            "working_dir must be a string or Path (either a local path "
            f"or remote URI), got {type(working_dir)}.")

    if isinstance(working_dir, Path):
        working_dir = str(working_dir)

    # working_dir is already a URI -- just pass it through.
    try:
        protocol, path = parse_uri(working_dir)
    except ValueError:
        protocol, path = None, None

    if protocol is not None:
        if protocol in Protocol.remote_protocols(
        ) and not path.endswith(".zip"):
            raise ValueError("Only .zip files supported for remote URIs.")
        return runtime_env

    excludes = runtime_env.get("excludes", None)
    try:
        working_dir_uri = get_uri_for_directory(working_dir, excludes=excludes)
    except ValueError:  # working_dir is not a directory
        package_path = Path(working_dir)
        if not package_path.exists() or package_path.suffix != ".zip":
            raise ValueError(f"directory {package_path} must be an existing "
                             "directory or a zip package")

        pkg_uri = get_uri_for_package(package_path)
        upload_package_to_gcs(pkg_uri, package_path.read_bytes())
        runtime_env["working_dir"] = pkg_uri
        return runtime_env

    upload_package_if_needed(
        working_dir_uri,
        scratch_dir,
        working_dir,
        include_parent_dir=False,
        excludes=excludes,
        logger=logger,
    )
    runtime_env["working_dir"] = working_dir_uri
    return runtime_env
Пример #11
0
def uri_to_http_components(package_uri: str) -> Tuple[str, str]:
    suffix = Path(package_uri).suffix
    if suffix not in {".zip", ".whl"}:
        raise ValueError(
            f"package_uri ({package_uri}) does not end in .zip or .whl")
    # We need to strip the <protocol>:// prefix to make it possible to pass
    # the package_uri over HTTP.
    protocol, package_name = parse_uri(package_uri)
    return protocol.value, package_name
Пример #12
0
def _check_is_uri(s: str) -> bool:
    try:
        protocol, path = parse_uri(s)
    except ValueError:
        protocol, path = None, None

    if protocol in Protocol.remote_protocols() and not path.endswith(".zip"):
        raise ValueError("Only .zip files supported for remote URIs.")

    return protocol is not None
Пример #13
0
    def delete_uri(self,
                   uri: str,
                   logger: Optional[logging.Logger] = default_logger) -> bool:
        logger.error(f"Got request to delete URI {uri}")
        protocol, hash = parse_uri(uri)
        if protocol != Protocol.CONDA:
            raise ValueError(
                "CondaManager can only delete URIs with protocol "
                f"conda.  Received protocol {protocol}, URI {uri}")

        conda_env_path = self._get_path_from_hash(hash)
        self._created_envs.remove(conda_env_path)
        successful = delete_conda_env(prefix=conda_env_path, logger=logger)
        if not successful:
            logger.debug(f"Error when deleting conda env {conda_env_path}. ")
        return successful
Пример #14
0
def validate_uri(uri: str):
    if not isinstance(uri, str):
        raise TypeError("URIs for working_dir and py_modules must be "
                        f"strings, got {type(uri)}.")

    try:
        from ray._private.runtime_env.packaging import parse_uri, Protocol
        protocol, path = parse_uri(uri)
    except ValueError:
        raise ValueError(
            f"{uri} is not a valid URI. Passing directories or modules to "
            "be dynamically uploaded is only supported at the job level "
            "(i.e., passed to `ray.init`).")

    if protocol == Protocol.S3 and not path.endswith(".zip"):
        raise ValueError("Only .zip files supported for S3 URIs.")
Пример #15
0
    def modify_context(
        self,
        uri: str,
        runtime_env: "RuntimeEnv",  # noqa: F821
        context: RuntimeEnvContext,
        logger: Optional[logging.Logger] = default_logger,
    ):
        if not runtime_env.has_conda():
            return

        if runtime_env.conda_env_name():
            conda_env_name = runtime_env.conda_env_name()
        else:
            protocol, hash = parse_uri(runtime_env.conda_uri())
            conda_env_name = self._get_path_from_hash(hash)
        context.py_executable = "python"
        context.command_prefix += get_conda_activate_commands(conda_env_name)
Пример #16
0
 def _upload_package(self,
                     package_uri: str,
                     package_path: str,
                     include_parent_dir: Optional[bool] = False,
                     excludes: Optional[List[str]] = None) -> bool:
     with tempfile.TemporaryDirectory() as tmp_dir:
         package_name = parse_uri(package_uri)[1]
         package_file = Path(tmp_dir) / package_name
         create_package(package_path,
                        package_file,
                        include_parent_dir=include_parent_dir,
                        excludes=excludes)
         self._do_request("PUT",
                          JOBS_API_ROUTE_PACKAGE,
                          data=package_file.read_bytes(),
                          params={"package_uri": package_uri})
         package_file.unlink()
Пример #17
0
    def delete_uri(self,
                   uri: str,
                   logger: Optional[logging.Logger] = default_logger) -> bool:
        logger.info(f"Got request to delete URI {uri}")
        protocol, hash = parse_uri(uri)
        if protocol != Protocol.PIP:
            raise ValueError("PipManager can only delete URIs with protocol "
                             f"pip. Received protocol {protocol}, URI {uri}")

        pip_env_path = self._get_path_from_hash(hash)
        try:
            shutil.rmtree(pip_env_path)
            successful = True
        except OSError:
            successful = False
            logger.warning(f"Error when deleting pip env {pip_env_path}.")
        return successful
Пример #18
0
    async def create(
        self,
        uri: str,
        runtime_env: RuntimeEnv,
        context: RuntimeEnvContext,
        logger: Optional[logging.Logger] = default_logger,
    ) -> int:
        if not runtime_env.has_pip():
            return 0

        protocol, hash = parse_uri(uri)
        target_dir = self._get_path_from_hash(hash)

        with FileLock(self._installs_and_deletions_file_lock):
            pip_processor = PipProcessor(target_dir, runtime_env, logger)
            pip_processor.run()

        return get_directory_size_bytes(target_dir)
Пример #19
0
    def modify_context(
        self,
        uri: str,
        runtime_env: RuntimeEnv,
        context: RuntimeEnvContext,
        logger: Optional[logging.Logger] = default_logger,
    ):
        if not runtime_env.has_pip():
            return
        # Update py_executable.
        protocol, hash = parse_uri(uri)
        target_dir = self._get_path_from_hash(hash)
        virtualenv_python = _PathHelper.get_virtualenv_python(target_dir)

        if not os.path.exists(virtualenv_python):
            raise ValueError(
                f"Local directory {target_dir} for URI {uri} does "
                "not exist on the cluster. Something may have gone wrong while "
                "installing the runtime_env `pip` packages.")
        context.py_executable = virtualenv_python
Пример #20
0
 def modify_context(
     self,
     uri: str,
     runtime_env: RuntimeEnv,
     context: RuntimeEnvContext,
     logger: Optional[logging.Logger] = default_logger,
 ):
     if not runtime_env.has_pip():
         return
     # Insert the target directory into the PYTHONPATH.
     protocol, hash = parse_uri(uri)
     target_dir = get_local_dir_from_uri(uri, self._resources_dir)
     if not target_dir.exists():
         raise ValueError(
             f"Local directory {target_dir} for URI {uri} does "
             "not exist on the cluster. Something may have gone wrong while "
             "installing the runtime_env `pip` packages.")
     python_path = str(target_dir)
     if "PYTHONPATH" in context.env_vars:
         python_path += os.pathsep + context.env_vars["PYTHONPATH"]
     context.env_vars["PYTHONPATH"] = python_path
Пример #21
0
    def delete_uri(self,
                   uri: str,
                   logger: Optional[logging.Logger] = default_logger) -> int:
        """Delete URI and return the number of bytes deleted."""
        logger.info(f"Got request to delete pip URI {uri}")
        protocol, hash = parse_uri(uri)
        if protocol != Protocol.PIP:
            raise ValueError("PipManager can only delete URIs with protocol "
                             f"pip. Received protocol {protocol}, URI {uri}")

        pip_env_path = self._get_path_from_hash(hash)
        local_dir_size = get_directory_size_bytes(pip_env_path)
        try:
            with FileLock(self._installs_and_deletions_file_lock):
                shutil.rmtree(pip_env_path)
        except OSError as e:
            logger.warning(
                f"Error when deleting pip env {pip_env_path}: {str(e)}")
            return 0

        return local_dir_size
Пример #22
0
    def delete_uri(self,
                   uri: str,
                   logger: Optional[logging.Logger] = default_logger) -> int:
        """Delete URI and return the number of bytes deleted."""
        logger.info(f"Got request to delete URI {uri}")
        protocol, hash = parse_uri(uri)
        if protocol != Protocol.CONDA:
            raise ValueError(
                "CondaManager can only delete URIs with protocol "
                f"conda.  Received protocol {protocol}, URI {uri}")

        conda_env_path = self._get_path_from_hash(hash)
        local_dir_size = get_directory_size_bytes(conda_env_path)

        with FileLock(self._installs_and_deletions_file_lock):
            successful = delete_conda_env(prefix=conda_env_path, logger=logger)
        if not successful:
            logger.warning(f"Error when deleting conda env {conda_env_path}. ")
            return 0

        return local_dir_size
Пример #23
0
def upload_working_dir_if_needed(
        runtime_env: Dict[str, Any],
        scratch_dir: str,
        logger: Optional[logging.Logger] = default_logger) -> Dict[str, Any]:
    """Uploads the working_dir and replaces it with a URI.

    If the working_dir is already a URI, this is a no-op.
    """
    working_dir = runtime_env.get("working_dir")
    if working_dir is None:
        return runtime_env

    if not isinstance(working_dir, str):
        raise TypeError(
            "working_dir must be a string (either a local path or remote "
            f"URI), got {type(working_dir)}.")

    # working_dir is already a URI -- just pass it through.
    try:
        protocol, path = parse_uri(working_dir)
    except ValueError:
        protocol, path = None, None

    if protocol is not None:
        if protocol in Protocol.remote_protocols(
        ) and not path.endswith(".zip"):
            raise ValueError("Only .zip files supported for remote URIs.")
        return runtime_env

    excludes = runtime_env.get("excludes", None)
    working_dir_uri = get_uri_for_directory(working_dir, excludes=excludes)
    upload_package_if_needed(working_dir_uri,
                             scratch_dir,
                             working_dir,
                             include_parent_dir=False,
                             excludes=excludes,
                             logger=logger)
    runtime_env["working_dir"] = working_dir_uri
    return runtime_env
Пример #24
0
    async def create(
        self,
        uri: str,
        runtime_env: "RuntimeEnv",  # noqa: F821
        context: RuntimeEnvContext,
        logger: Optional[logging.Logger] = default_logger,
    ) -> int:
        if not runtime_env.has_pip():
            return 0

        protocol, hash = parse_uri(uri)
        target_dir = self._get_path_from_hash(hash)

        async def _create_for_hash():
            await PipProcessor(target_dir, runtime_env, logger)

            loop = get_running_loop()
            return await loop.run_in_executor(None, get_directory_size_bytes,
                                              target_dir)

        self._creating_task[hash] = task = create_task(_create_for_hash())
        task.add_done_callback(lambda _: self._creating_task.pop(hash, None))
        return await task
Пример #25
0
def test_parsing(parsing_tuple):
    uri, protocol, package_name = parsing_tuple
    parsed_protocol, parsed_package_name = parse_uri(uri)

    assert protocol == parsed_protocol
    assert package_name == parsed_package_name