def test_repotree_cache_save(tmp_dir, dvc, scm, erepo_dir, local_cloud): with erepo_dir.chdir(): erepo_dir.gen({"dir": {"subdir": {"foo": "foo"}, "bar": "bar"}}) erepo_dir.dvc_add("dir/subdir", commit="subdir") erepo_dir.scm_add("dir", commit="dir") erepo_dir.add_remote(config=local_cloud.config) erepo_dir.dvc.push() # test only cares that either fetch or stream are set so that DVC dirs are # walked. # # for this test, all file objects are being opened() and copied from tree # into dvc.cache, not fetched or streamed from a remote tree = RepoTree(erepo_dir.dvc, stream=True) expected = [ tree.get_file_hash(PathInfo(erepo_dir / path)).value for path in ("dir/bar", "dir/subdir/foo") ] cache = dvc.cache.local path_info = PathInfo(erepo_dir / "dir") hash_info = cache.tree.get_hash(path_info) cache.save(path_info, tree, hash_info) for hash_ in expected: assert os.path.exists(cache.tree.hash_to_path_info(hash_))
def _get_checksum(self, locked=True): from dvc.tree.repo import RepoTree with self._make_repo(locked=locked) as repo: try: return repo.find_out_by_relpath(self.def_path).info["md5"] except OutputNotFoundError: path = PathInfo(os.path.join(repo.root_dir, self.def_path)) # we want stream but not fetch, so DVC out directories are # walked, but dir contents is not fetched tree = RepoTree(repo, stream=True) # We are polluting our repo cache with some dir listing here if tree.isdir(path): return self.repo.cache.local.tree.get_hash(path, tree=tree)[1] return tree.get_file_hash(path)