def test_used_cache(tmp_dir, dvc, path): from dvc.objects.db import NamedCache tmp_dir.dvc_gen({"dir": {"subdir": {"file": "file"}, "other": "other"}}) expected = NamedCache.make("local", "70922d6bf66eb073053a82f77d58c536.dir", "dir") expected.add_child_cache( "70922d6bf66eb073053a82f77d58c536.dir", NamedCache.make( "local", "8c7dd922ad47494fc02c388e12c00eac", os.path.join("dir", "subdir", "file"), ), ) used_cache = dvc.used_cache([path]) assert (used_cache._items == expected._items and used_cache.external == expected.external)
def get_used_cache(self, **kwargs): """Get a dumpd of the given `out`, with an entry including the branch. The `used_cache` of an output is no more than its `info`. In case that the given output is a directory, it will also include the `info` of its files. """ if not self.use_cache: return NamedCache() if self.stage.is_repo_import: cache = NamedCache() (dep,) = self.stage.deps cache.external[dep.repo_pair].add(dep.def_path) return cache if not self.hash_info: msg = ( "Output '{}'({}) is missing version info. " "Cache for it will not be collected. " "Use `dvc repro` to get your pipeline up to date.".format( self, self.stage ) ) if self.exists: msg += ( "\n" "You can also use `dvc commit {stage.addressing}` " "to associate existing '{out}' with {stage}.".format( out=self, stage=self.stage ) ) logger.warning(msg) return NamedCache() ret = NamedCache.make(self.scheme, self.hash_info.value, str(self)) if not self.is_dir_checksum: return ret ret.add_child_cache( self.hash_info.value, self.collect_used_dir_cache(**kwargs) ) return ret
def get_dir_cache(self, **kwargs): if not self.is_dir_checksum: raise DvcException("cannot get dir cache for file checksum") try: objects.check(self.odb, self.odb.get(self.hash_info)) except (FileNotFoundError, ObjectFormatError): self.repo.cloud.pull( NamedCache.make("local", self.hash_info.value, str(self)), show_checksums=False, **kwargs, ) try: self.obj = objects.load(self.odb, self.hash_info) except (FileNotFoundError, ObjectFormatError): self.obj = None return self.obj
def test_cloud(tmp_dir, dvc, remote): # pylint:disable=unused-argument (stage,) = tmp_dir.dvc_gen("foo", "foo") out = stage.outs[0] cache = out.cache_path md5 = out.hash_info.value info = out.get_used_cache() (stage_dir,) = tmp_dir.dvc_gen( { "data_dir": { "data_sub_dir": {"data_sub": "data_sub"}, "data": "data", "empty": "", } } ) out_dir = stage_dir.outs[0] cache_dir = out_dir.cache_path name_dir = str(out_dir) md5_dir = out_dir.hash_info.value info_dir = NamedCache.make(out_dir.scheme, md5_dir, name_dir) # Check status status = dvc.cloud.status(info, show_checksums=True) expected = {md5: {"name": md5, "status": STATUS_NEW}} assert status == expected status_dir = dvc.cloud.status(info_dir, show_checksums=True) expected = {md5_dir: {"name": md5_dir, "status": STATUS_NEW}} assert status_dir == expected # Move cache and check status # See issue https://github.com/iterative/dvc/issues/4383 for details backup_dir = dvc.odb.local.cache_dir + ".backup" move(dvc.odb.local.cache_dir, backup_dir) status = dvc.cloud.status(info, show_checksums=True) expected = {md5: {"name": md5, "status": STATUS_MISSING}} assert status == expected status_dir = dvc.cloud.status(info_dir, show_checksums=True) expected = {md5_dir: {"name": md5_dir, "status": STATUS_MISSING}} assert status_dir == expected # Restore original cache: remove(dvc.odb.local.cache_dir) move(backup_dir, dvc.odb.local.cache_dir) # Push and check status dvc.cloud.push(info) assert os.path.exists(cache) assert os.path.isfile(cache) dvc.cloud.push(info_dir) assert os.path.isfile(cache_dir) status = dvc.cloud.status(info, show_checksums=True) expected = {md5: {"name": md5, "status": STATUS_OK}} assert status == expected status_dir = dvc.cloud.status(info_dir, show_checksums=True) expected = {md5_dir: {"name": md5_dir, "status": STATUS_OK}} assert status_dir == expected # Remove and check status remove(dvc.odb.local.cache_dir) status = dvc.cloud.status(info, show_checksums=True) expected = {md5: {"name": md5, "status": STATUS_DELETED}} assert status == expected status_dir = dvc.cloud.status(info_dir, show_checksums=True) expected = {md5_dir: {"name": md5_dir, "status": STATUS_DELETED}} assert status_dir == expected # Pull and check status dvc.cloud.pull(info) assert os.path.exists(cache) assert os.path.isfile(cache) with open(cache) as fd: assert fd.read() == "foo" dvc.cloud.pull(info_dir) assert os.path.isfile(cache_dir) status = dvc.cloud.status(info, show_checksums=True) expected = {md5: {"name": md5, "status": STATUS_OK}} assert status == expected status_dir = dvc.cloud.status(info_dir, show_checksums=True) expected = {md5_dir: {"name": md5_dir, "status": STATUS_OK}} assert status_dir == expected