def write_zipped_internal_cache(cls, pex, pex_info): prefix_length = len(pex_info.internal_cache) + 1 existing_cached_distributions = [] newly_cached_distributions = [] zip_safe_distributions = [] with open_zip(pex) as zf: # Distribution names are the first element after ".deps/" and before the next "/" distribution_names = set(filter(None, (filename[prefix_length:].split('/')[0] for filename in zf.namelist() if filename.startswith(pex_info.internal_cache)))) # Create Distribution objects from these, and possibly write to disk if necessary. for distribution_name in distribution_names: internal_dist_path = '/'.join([pex_info.internal_cache, distribution_name]) # First check if this is already cached dist_digest = pex_info.distributions.get(distribution_name) or CacheHelper.zip_hash( zf, internal_dist_path) cached_location = os.path.join(pex_info.install_cache, '%s.%s' % ( distribution_name, dist_digest)) if os.path.exists(cached_location): dist = DistributionHelper.distribution_from_path(cached_location) if dist is not None: existing_cached_distributions.append(dist) continue else: dist = DistributionHelper.distribution_from_path(os.path.join(pex, internal_dist_path)) if dist is not None: if DistributionHelper.zipsafe(dist) and not pex_info.always_write_cache: zip_safe_distributions.append(dist) continue with TRACER.timed('Caching %s' % dist): newly_cached_distributions.append( CacheHelper.cache_distribution(zf, internal_dist_path, cached_location)) return existing_cached_distributions, newly_cached_distributions, zip_safe_distributions
def _add_dist_zip(self, path, dist_name): # We need to distinguish between wheels and other zips. Most of the time, # when we have a zip, it contains its contents in an importable form. # But wheels don't have to be importable, so we need to force them # into an importable shape. We can do that by installing it into its own # wheel dir. if dist_name.endswith("whl"): from pex.third_party.wheel.install import WheelFile tmp = safe_mkdtemp() whltmp = os.path.join(tmp, dist_name) os.mkdir(whltmp) wf = WheelFile(path) wf.install(overrides=self._get_installer_paths(whltmp), force=True) for root, _, files in os.walk(whltmp): pruned_dir = os.path.relpath(root, tmp) for f in files: fullpath = os.path.join(root, f) target = os.path.join(self._pex_info.internal_cache, pruned_dir, f) self._copy_or_link(fullpath, target) return CacheHelper.dir_hash(whltmp) with open_zip(path) as zf: for name in zf.namelist(): if name.endswith('/'): continue target = os.path.join(self._pex_info.internal_cache, dist_name, name) self._chroot.write(zf.read(name), target) return CacheHelper.zip_hash(zf)
def test_hash_consistency(): for reverse in (False, True): with temporary_content(CONTENT) as td: dir_hash = CacheHelper.dir_hash(td) with named_temporary_file() as tf: write_zipfile(td, tf.name, reverse=reverse) with contextlib.closing(zipfile.ZipFile(tf.name, 'r')) as zf: zip_hash = CacheHelper.zip_hash(zf) assert zip_hash == dir_hash assert zip_hash != sha1().hexdigest() # make sure it's not an empty hash
def _add_dist_dir(self, path, dist_name): for root, _, files in os.walk(path): for f in files: filename = os.path.join(root, f) relpath = os.path.relpath(filename, path) target = os.path.join(self._pex_info.internal_cache, dist_name, relpath) self._copy_or_link(filename, target) return CacheHelper.dir_hash(path)
def test_hash(): empty_hash = sha1().hexdigest() with temporary_file() as fp: fp.flush() assert empty_hash == CacheHelper.hash(fp.name) with temporary_file() as fp: string = b'asdf' * 1024 * sha1().block_size + b'extra padding' fp.write(string) fp.flush() assert sha1(string).hexdigest() == CacheHelper.hash(fp.name) with temporary_file() as fp: empty_hash = sha1() fp.write(b'asdf') fp.flush() hash_output = CacheHelper.hash(fp.name, digest=empty_hash) assert hash_output == empty_hash.hexdigest()
def test_hash(): empty_hash = sha1().hexdigest() with named_temporary_file() as fp: fp.flush() assert empty_hash == CacheHelper.hash(fp.name) with named_temporary_file() as fp: string = b'asdf' * 1024 * sha1().block_size + b'extra padding' fp.write(string) fp.flush() assert sha1(string).hexdigest() == CacheHelper.hash(fp.name) with named_temporary_file() as fp: empty_hash = sha1() fp.write(b'asdf') fp.flush() hash_output = CacheHelper.hash(fp.name, digest=empty_hash) assert hash_output == empty_hash.hexdigest()
def test_hash(): # type: () -> None empty_hash_digest = sha1().hexdigest() with named_temporary_file() as fp: fp.flush() assert empty_hash_digest == CacheHelper.hash(fp.name) with named_temporary_file() as fp: string = b"asdf" * 1024 * sha1().block_size + b"extra padding" fp.write(string) fp.flush() assert sha1(string).hexdigest() == CacheHelper.hash(fp.name) with named_temporary_file() as fp: empty_hash = sha1() fp.write(b"asdf") fp.flush() hash_output = CacheHelper.hash(fp.name, digest=empty_hash) assert hash_output == empty_hash.hexdigest()
def _write_zipped_internal_cache(cls, zf, pex_info): cached_distributions = [] for distribution_name, dist_digest in pex_info.distributions.items(): internal_dist_path = '/'.join( [pex_info.internal_cache, distribution_name]) cached_location = os.path.join(pex_info.install_cache, dist_digest, distribution_name) dist = CacheHelper.cache_distribution(zf, internal_dist_path, cached_location) cached_distributions.append(dist) return cached_distributions
def _add_dist_dir(self, path, dist_name): target_dir = os.path.join(self._pex_info.internal_cache, dist_name) if self._copy_mode == CopyMode.SYMLINK: self._copy_or_link(path, target_dir) else: for root, _, files in os.walk(path): for f in files: filename = os.path.join(root, f) relpath = os.path.relpath(filename, path) target = os.path.join(target_dir, relpath) self._copy_or_link(filename, target) return CacheHelper.dir_hash(path)
def isolated(): """Returns a chroot for third_party isolated from the ``sys.path``. PEX will typically be installed in site-packages flat alongside many other distributions; as such, adding the location of the pex distribution to the ``sys.path`` will typically expose many other distributions. An isolated chroot can be used as a ``sys.path`` entry to effect only the exposure of pex. :return: An isolation result. :rtype: :class:`IsolationResult` """ global _ISOLATED if _ISOLATED is None: from pex import vendor from pex.common import atomic_directory from pex.util import CacheHelper from pex.variables import ENV from pex.third_party.pkg_resources import resource_isdir, resource_listdir, resource_stream module = 'pex' def recursive_copy(srcdir, dstdir): os.mkdir(dstdir) for entry_name in resource_listdir(module, srcdir): # NB: Resource path components are always separated by /, on all systems. src_entry = '{}/{}'.format( srcdir, entry_name) if srcdir else entry_name dst_entry = os.path.join(dstdir, entry_name) if resource_isdir(module, src_entry): recursive_copy(src_entry, dst_entry) elif not entry_name.endswith('.pyc'): with open(dst_entry, 'wb') as fp: shutil.copyfileobj(resource_stream(module, src_entry), fp) pex_path = os.path.join(vendor.VendorSpec.ROOT, 'pex') with _tracer().timed('Hashing pex'): dir_hash = CacheHelper.dir_hash(pex_path) isolated_dir = os.path.join(ENV.PEX_ROOT, 'isolated', dir_hash) with _tracer().timed('Isolating pex'): with atomic_directory(isolated_dir) as chroot: if chroot: with _tracer().timed( 'Extracting pex to {}'.format(isolated_dir)): recursive_copy('', os.path.join(chroot, 'pex')) _ISOLATED = IsolationResult(pex_hash=dir_hash, chroot_path=isolated_dir) return _ISOLATED
def isolated(): """Returns a chroot for third_party isolated from the ``sys.path``. PEX will typically be installed in site-packages flat alongside many other distributions; as such, adding the location of the pex distribution to the ``sys.path`` will typically expose many other distributions. An isolated chroot can be used as a ``sys.path`` entry to effect only the exposure of pex. :return: The path of the chroot. :rtype: str """ global _ISOLATED if _ISOLATED is None: from pex import vendor from pex.common import atomic_directory, safe_copy from pex.util import CacheHelper from pex.variables import ENV pex_path = os.path.join(vendor.VendorSpec.ROOT, 'pex') with _tracer().timed('Isolating pex'): isolated_dir = os.path.join(ENV.PEX_ROOT, 'isolated', CacheHelper.dir_hash(pex_path)) with atomic_directory(isolated_dir) as chroot: if chroot: with _tracer().timed( 'Extracting pex to {}'.format(isolated_dir)): pex_path = os.path.join(vendor.VendorSpec.ROOT, 'pex') for root, dirs, files in os.walk(pex_path): relroot = os.path.relpath(root, pex_path) for d in dirs: os.makedirs( os.path.join(chroot, 'pex', relroot, d)) for f in files: if not f.endswith('.pyc'): safe_copy( os.path.join(root, f), os.path.join(chroot, 'pex', relroot, f)) _ISOLATED = isolated_dir return _ISOLATED
def _prepare_code(self): self._pex_info.code_hash = CacheHelper.pex_code_hash( self._chroot.path()) self._pex_info.pex_hash = hashlib.sha1( self._pex_info.dump().encode("utf-8")).hexdigest() self._chroot.write(self._pex_info.dump().encode("utf-8"), PexInfo.PATH, label="manifest") bootstrap = BOOTSTRAP_ENVIRONMENT.format( bootstrap_dir=BOOTSTRAP_DIR, pex_root=self._pex_info.raw_pex_root, pex_hash=self._pex_info.pex_hash, interpreter_constraints=self._pex_info.interpreter_constraints, pex_path=self._pex_info.pex_path, is_unzip=self._pex_info.unzip, is_venv=self._pex_info.venv, ) self._chroot.write(to_bytes(self._preamble + "\n" + bootstrap), "__main__.py", label="main")
def _prepare_code_hash(self): self._pex_info.code_hash = CacheHelper.pex_hash(self._chroot.path())
def isolated(): """Returns a chroot for third_party isolated from the ``sys.path``. PEX will typically be installed in site-packages flat alongside many other distributions; as such, adding the location of the pex distribution to the ``sys.path`` will typically expose many other distributions. An isolated chroot can be used as a ``sys.path`` entry to effect only the exposure of pex. :return: An isolation result. :rtype: :class:`IsolationResult` """ global _ISOLATED if _ISOLATED is None: from pex import vendor from pex.common import atomic_directory from pex.util import CacheHelper from pex.variables import ENV from pex.third_party.pkg_resources import resource_isdir, resource_listdir, resource_stream module = "pex" # TODO(John Sirois): Unify with `pex.util.DistributionHelper.access_zipped_assets`. def recursive_copy(srcdir, dstdir): os.mkdir(dstdir) for entry_name in resource_listdir(module, srcdir): if not entry_name: # The `resource_listdir` function returns a '' entry name for the directory # entry itself if it is either present on the filesystem or present as an # explicit zip entry. Since we only care about files and subdirectories at this # point, skip these entries. continue # NB: Resource path components are always separated by /, on all systems. src_entry = "{}/{}".format(srcdir, entry_name) if srcdir else entry_name dst_entry = os.path.join(dstdir, entry_name) if resource_isdir(module, src_entry): recursive_copy(src_entry, dst_entry) elif not entry_name.endswith(".pyc"): with open(dst_entry, "wb") as fp: with closing(resource_stream(module, src_entry)) as resource: shutil.copyfileobj(resource, fp) pex_path = os.path.join(vendor.VendorSpec.ROOT, "pex") with _tracer().timed("Hashing pex"): if os.path.isdir(pex_path): dir_hash = CacheHelper.dir_hash(pex_path) else: pex_zip = os.path.abspath(sys.argv[0]) assert zipfile.is_zipfile(pex_zip) and pex_zip == os.path.commonprefix( (pex_zip, pex_path) ), ( "Expected the `pex` module to be available via an installed distribution or " "else via a PEX zipfile present as argv0. Loaded the `pex` module from {} and " "argv0 is {}.".format(pex_path, sys.argv[0]) ) dir_hash = CacheHelper.zip_hash(pex_zip, os.path.relpath(pex_path, pex_zip)) isolated_dir = os.path.join(ENV.PEX_ROOT, "isolated", dir_hash) with _tracer().timed("Isolating pex"): with atomic_directory(isolated_dir, exclusive=True) as chroot: if chroot: with _tracer().timed("Extracting pex to {}".format(isolated_dir)): recursive_copy("", os.path.join(chroot, "pex")) _ISOLATED = IsolationResult(pex_hash=dir_hash, chroot_path=isolated_dir) return _ISOLATED
def finalize_install(self, install_requests): self.atomic_dir.finalize() # The install_chroot is keyed by the hash of the wheel file (zip) we installed. Here we add a # key by the hash of the exploded wheel dir (the install_chroot). This latter key is used by # zipped PEXes at runtime to explode their wheel chroots to the filesystem. By adding the key # here we short-circuit the explode process for PEXes created and run on the same machine. # # From a clean cache after building a simple pex this looks like: # $ rm -rf ~/.pex # $ python -mpex -c pex -o /tmp/pex.pex . # $ tree -L 4 ~/.pex/ # /home/jsirois/.pex/ # ├── built_wheels # │ └── 1003685de2c3604dc6daab9540a66201c1d1f718 # │ └── cp-38-cp38 # │ └── pex-2.0.2-py2.py3-none-any.whl # └── installed_wheels # ├── 2a594cef34d2e9109bad847358d57ac4615f81f4 # │ └── pex-2.0.2-py2.py3-none-any.whl # │ ├── bin # │ ├── pex # │ └── pex-2.0.2.dist-info # └── ae13cba3a8e50262f4d730699a11a5b79536e3e1 # └── pex-2.0.2-py2.py3-none-any.whl -> /home/jsirois/.pex/installed_wheels/2a594cef34d2e9109bad847358d57ac4615f81f4/pex-2.0.2-py2.py3-none-any.whl # noqa # # 11 directories, 1 file # # And we see in the created pex, the runtime key that the layout above satisfies: # $ unzip -qc /tmp/pex.pex PEX-INFO | jq .distributions # { # "pex-2.0.2-py2.py3-none-any.whl": "ae13cba3a8e50262f4d730699a11a5b79536e3e1" # } # # When the pex is run, the runtime key is followed to the build time key, avoiding re-unpacking # the wheel: # $ PEX_VERBOSE=1 /tmp/pex.pex --version # pex: Found site-library: /usr/lib/python3.8/site-packages # pex: Tainted path element: /usr/lib/python3.8/site-packages # pex: Scrubbing from user site: /home/jsirois/.local/lib/python3.8/site-packages # pex: Scrubbing from site-packages: /usr/lib/python3.8/site-packages # pex: Activating PEX virtual environment from /tmp/pex.pex: 9.1ms # pex: Bootstrap complete, performing final sys.path modifications... # pex: PYTHONPATH contains: # pex: /tmp/pex.pex # pex: * /usr/lib/python38.zip # pex: /usr/lib/python3.8 # pex: /usr/lib/python3.8/lib-dynload # pex: /home/jsirois/.pex/installed_wheels/2a594cef34d2e9109bad847358d57ac4615f81f4/pex-2.0.2-py2.py3-none-any.whl # noqa # pex: * /tmp/pex.pex/.bootstrap # pex: * - paths that do not exist or will be imported via zipimport # pex.pex 2.0.2 # wheel_dir_hash = CacheHelper.dir_hash(self.install_chroot) runtime_key_dir = os.path.join(self.installation_root, wheel_dir_hash) with atomic_directory(runtime_key_dir) as work_dir: if work_dir: os.symlink(self.install_chroot, os.path.join(work_dir, self.request.wheel_file)) return self._iter_requirements_requests(install_requests)
def _spawn_from_binary_external(cls, binary): def create_interpreter(stdout): identity = stdout.decode('utf-8').strip() if not identity: raise cls.IdentificationError( 'Could not establish identity of %s' % binary) return cls(PythonIdentity.decode(identity)) # Part of the PythonInterpreter data are environment markers that depend on the current OS # release. That data can change when the OS is upgraded but (some of) the installed interpreters # remain the same. As such, include the OS in the hash structure for cached interpreters. os_digest = hashlib.sha1() for os_identifier in platform.release(), platform.version(): os_digest.update(os_identifier.encode('utf-8')) os_hash = os_digest.hexdigest() interpreter_cache_dir = os.path.join(ENV.PEX_ROOT, 'interpreters') os_cache_dir = os.path.join(interpreter_cache_dir, os_hash) if os.path.isdir( interpreter_cache_dir) and not os.path.isdir(os_cache_dir): with TRACER.timed('GCing interpreter cache from prior OS version'): safe_rmtree(interpreter_cache_dir) interpreter_hash = CacheHelper.hash(binary) cache_dir = os.path.join(os_cache_dir, interpreter_hash) cache_file = os.path.join(cache_dir, cls.INTERP_INFO_FILE) if os.path.isfile(cache_file): try: with open(cache_file, 'rb') as fp: return SpawnedJob.completed(create_interpreter(fp.read())) except (IOError, OSError, cls.Error, PythonIdentity.Error): safe_rmtree(cache_dir) return cls._spawn_from_binary_external(binary) else: pythonpath = third_party.expose(['pex']) cmd, env = cls._create_isolated_cmd(binary, args=[ '-c', dedent("""\ import os import sys from pex.common import atomic_directory, safe_open from pex.interpreter import PythonIdentity encoded_identity = PythonIdentity.get().encode() sys.stdout.write(encoded_identity) with atomic_directory({cache_dir!r}) as cache_dir: if cache_dir: with safe_open(os.path.join(cache_dir, {info_file!r}), 'w') as fp: fp.write(encoded_identity) """.format(cache_dir=cache_dir, info_file=cls.INTERP_INFO_FILE)) ], pythonpath=pythonpath) process = Executor.open_process(cmd, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE) job = Job(command=cmd, process=process) return SpawnedJob.stdout(job, result_func=create_interpreter)
def _spawn_from_binary_external(cls, binary): def create_interpreter(stdout): identity = stdout.decode("utf-8").strip() if not identity: raise cls.IdentificationError( "Could not establish identity of %s" % binary) return cls(PythonIdentity.decode(identity)) # Part of the PythonInterpreter data are environment markers that depend on the current OS # release. That data can change when the OS is upgraded but (some of) the installed interpreters # remain the same. As such, include the OS in the hash structure for cached interpreters. os_digest = hashlib.sha1() for os_identifier in platform.release(), platform.version(): os_digest.update(os_identifier.encode("utf-8")) os_hash = os_digest.hexdigest() interpreter_cache_dir = os.path.join(ENV.PEX_ROOT, "interpreters") os_cache_dir = os.path.join(interpreter_cache_dir, os_hash) if os.path.isdir( interpreter_cache_dir) and not os.path.isdir(os_cache_dir): with TRACER.timed("GCing interpreter cache from prior OS version"): safe_rmtree(interpreter_cache_dir) interpreter_hash = CacheHelper.hash(binary) # Some distributions include more than one copy of the same interpreter via a hard link (e.g.: # python3.7 is a hardlink to python3.7m). To ensure a deterministic INTERP-INFO file we must # emit a separate INTERP-INFO for each link since INTERP-INFO contains the interpreter path and # would otherwise be unstable. # # See cls._REGEXEN for a related affordance. path_id = binary.replace(os.sep, ".").lstrip(".") cache_dir = os.path.join(os_cache_dir, interpreter_hash, path_id) cache_file = os.path.join(cache_dir, cls.INTERP_INFO_FILE) if os.path.isfile(cache_file): try: with open(cache_file, "rb") as fp: return SpawnedJob.completed(create_interpreter(fp.read())) except (IOError, OSError, cls.Error, PythonIdentity.Error): safe_rmtree(cache_dir) return cls._spawn_from_binary_external(binary) else: pythonpath = third_party.expose(["pex"]) cmd, env = cls._create_isolated_cmd( binary, args=[ "-c", dedent("""\ import os import sys from pex.common import atomic_directory, safe_open from pex.interpreter import PythonIdentity encoded_identity = PythonIdentity.get().encode() sys.stdout.write(encoded_identity) with atomic_directory({cache_dir!r}) as cache_dir: if cache_dir: with safe_open(os.path.join(cache_dir, {info_file!r}), 'w') as fp: fp.write(encoded_identity) """.format(cache_dir=cache_dir, info_file=cls.INTERP_INFO_FILE)), ], pythonpath=pythonpath, ) process = Executor.open_process(cmd, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE) job = Job(command=cmd, process=process) return SpawnedJob.stdout(job, result_func=create_interpreter)
def create(cls, target, wheel_path): fingerprint = CacheHelper.hash(wheel_path) return cls(target=target, wheel_path=wheel_path, fingerprint=fingerprint)
def _spawn_from_binary_external(cls, binary): def create_interpreter(stdout, check_binary=False): identity = stdout.decode("utf-8").strip() if not identity: raise cls.IdentificationError( "Could not establish identity of {}.".format(binary)) interpreter = cls(PythonIdentity.decode(identity)) # We should not need to check this since binary == interpreter.binary should always be # true, but historically this could be untrue as noted in `PythonIdentity.get`. if check_binary and not os.path.exists(interpreter.binary): raise cls.InterpreterNotFound( "Cached interpreter for {} reports a binary of {}, which could not be found" .format(binary, interpreter.binary)) return interpreter # Part of the PythonInterpreter data are environment markers that depend on the current OS # release. That data can change when the OS is upgraded but (some of) the installed interpreters # remain the same. As such, include the OS in the hash structure for cached interpreters. os_digest = hashlib.sha1() for os_identifier in platform.release(), platform.version(): os_digest.update(os_identifier.encode("utf-8")) os_hash = os_digest.hexdigest() interpreter_cache_dir = os.path.join(ENV.PEX_ROOT, "interpreters") os_cache_dir = os.path.join(interpreter_cache_dir, os_hash) if os.path.isdir( interpreter_cache_dir) and not os.path.isdir(os_cache_dir): with TRACER.timed("GCing interpreter cache from prior OS version"): safe_rmtree(interpreter_cache_dir) interpreter_hash = CacheHelper.hash(binary) # Some distributions include more than one copy of the same interpreter via a hard link (e.g.: # python3.7 is a hardlink to python3.7m). To ensure a deterministic INTERP-INFO file we must # emit a separate INTERP-INFO for each link since INTERP-INFO contains the interpreter path and # would otherwise be unstable. # # See cls._REGEXEN for a related affordance. # # N.B.: The path for --venv mode interpreters can be quite long; so we just used a fixed # length hash of the interpreter binary path to ensure uniqueness and not run afoul of file # name length limits. path_id = hashlib.sha1(binary.encode("utf-8")).hexdigest() cache_dir = os.path.join(os_cache_dir, interpreter_hash, path_id) cache_file = os.path.join(cache_dir, cls.INTERP_INFO_FILE) if os.path.isfile(cache_file): try: with open(cache_file, "rb") as fp: return SpawnedJob.completed( create_interpreter(fp.read(), check_binary=True)) except (IOError, OSError, cls.Error, PythonIdentity.Error): safe_rmtree(cache_dir) return cls._spawn_from_binary_external(binary) else: pythonpath = third_party.expose(["pex"]) cmd, env = cls._create_isolated_cmd( binary, args=[ "-c", dedent("""\ import os import sys from pex.common import atomic_directory, safe_open from pex.interpreter import PythonIdentity encoded_identity = PythonIdentity.get(binary={binary!r}).encode() sys.stdout.write(encoded_identity) with atomic_directory({cache_dir!r}, exclusive=False) as cache_dir: if cache_dir: with safe_open(os.path.join(cache_dir, {info_file!r}), 'w') as fp: fp.write(encoded_identity) """.format(binary=binary, cache_dir=cache_dir, info_file=cls.INTERP_INFO_FILE)), ], pythonpath=pythonpath, ) process = Executor.open_process(cmd, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE) job = Job(command=cmd, process=process) return SpawnedJob.stdout(job, result_func=create_interpreter)