def _upload_package_if_needed( self, package_path: str, include_parent_dir: bool = False, excludes: Optional[List[str]] = None, is_file: bool = False, ) -> str: if is_file: package_uri = get_uri_for_package(Path(package_path)) else: package_uri = get_uri_for_directory(package_path, excludes=excludes) if not self._package_exists(package_uri): self._upload_package( package_uri, package_path, include_parent_dir=include_parent_dir, excludes=excludes, is_file=is_file, ) else: logger.info( f"Package {package_uri} already exists, skipping upload.") return package_uri
def upload_working_dir_if_needed( runtime_env: Dict[str, Any], scratch_dir: str, logger: Optional[logging.Logger] = default_logger, ) -> Dict[str, Any]: """Uploads the working_dir and replaces it with a URI. If the working_dir is already a URI, this is a no-op. """ working_dir = runtime_env.get("working_dir") if working_dir is None: return runtime_env if not isinstance(working_dir, str) and not isinstance(working_dir, Path): raise TypeError( "working_dir must be a string or Path (either a local path " f"or remote URI), got {type(working_dir)}.") if isinstance(working_dir, Path): working_dir = str(working_dir) # working_dir is already a URI -- just pass it through. try: protocol, path = parse_uri(working_dir) except ValueError: protocol, path = None, None if protocol is not None: if protocol in Protocol.remote_protocols( ) and not path.endswith(".zip"): raise ValueError("Only .zip files supported for remote URIs.") return runtime_env excludes = runtime_env.get("excludes", None) try: working_dir_uri = get_uri_for_directory(working_dir, excludes=excludes) except ValueError: # working_dir is not a directory package_path = Path(working_dir) if not package_path.exists() or package_path.suffix != ".zip": raise ValueError(f"directory {package_path} must be an existing " "directory or a zip package") pkg_uri = get_uri_for_package(package_path) upload_package_to_gcs(pkg_uri, package_path.read_bytes()) runtime_env["working_dir"] = pkg_uri return runtime_env upload_package_if_needed( working_dir_uri, scratch_dir, working_dir, include_parent_dir=False, excludes=excludes, logger=logger, ) runtime_env["working_dir"] = working_dir_uri return runtime_env
def test_get_uri_for_package(): assert get_uri_for_package(Path("/tmp/my-pkg.whl")) == "gcs://my-pkg.whl"
def upload_py_modules_if_needed( runtime_env: Dict[str, Any], scratch_dir: Optional[str] = os.getcwd(), logger: Optional[logging.Logger] = default_logger, upload_fn=None, ) -> Dict[str, Any]: """Uploads the entries in py_modules and replaces them with a list of URIs. For each entry that is already a URI, this is a no-op. """ py_modules = runtime_env.get("py_modules") if py_modules is None: return runtime_env if not isinstance(py_modules, list): raise TypeError( "py_modules must be a List of local paths, imported modules, or " f"URIs, got {type(py_modules)}.") py_modules_uris = [] for module in py_modules: if isinstance(module, str): # module_path is a local path or a URI. module_path = module elif isinstance(module, Path): module_path = str(module) elif isinstance(module, ModuleType): # NOTE(edoakes): Python allows some installed Python packages to # be split into multiple directories. We could probably handle # this, but it seems tricky & uncommon. If it's a problem for # users, we can add this support on demand. if len(module.__path__) > 1: raise ValueError( "py_modules only supports modules whose __path__ has length 1." ) [module_path] = module.__path__ else: raise TypeError("py_modules must be a list of file paths, URIs, " f"or imported modules, got {type(module)}.") if _check_is_uri(module_path): module_uri = module_path else: # module_path is a local path. if Path(module_path).is_dir(): excludes = runtime_env.get("excludes", None) module_uri = get_uri_for_directory(module_path, excludes=excludes) if upload_fn is None: upload_package_if_needed( module_uri, scratch_dir, module_path, excludes=excludes, include_parent_dir=True, logger=logger, ) else: upload_fn(module_path, excludes=excludes) elif Path(module_path).suffix == ".whl": module_uri = get_uri_for_package(Path(module_path)) if upload_fn is None: if not package_exists(module_uri): upload_package_to_gcs(module_uri, Path(module_path).read_bytes()) else: upload_fn(module_path, excludes=None, is_file=True) else: raise ValueError( "py_modules entry must be a directory or a .whl file; " f"got {module_path}") py_modules_uris.append(module_uri) # TODO(architkulkarni): Expose a single URI for py_modules. This plugin # should internally handle the "sub-URIs", the individual modules. runtime_env["py_modules"] = py_modules_uris return runtime_env