Пример #1
0
def test_repotree_cache_save(tmp_dir, dvc, scm, erepo_dir, setup_remote):
    with erepo_dir.chdir():
        erepo_dir.gen({"dir": {"subdir": {"foo": "foo"}, "bar": "bar"}})
        erepo_dir.dvc_add("dir/subdir", commit="subdir")
        erepo_dir.scm_add("dir", commit="dir")
        setup_remote(erepo_dir.dvc)
        erepo_dir.dvc.push()

    # test only cares that either fetch or stream are set so that DVC dirs are
    # walked.
    #
    # for this test, all file objects are being opened() and copied from tree
    # into dvc.cache, not fetched or streamed from a remote
    tree = RepoTree(erepo_dir.dvc, stream=True)
    expected = [
        tree.get_file_checksum(erepo_dir / path)
        for path in ("dir/bar", "dir/subdir/foo")
    ]

    with erepo_dir.dvc.state:
        cache = dvc.cache.local
        with cache.state:
            cache.save(PathInfo(erepo_dir / "dir"), None, tree=tree)
    for checksum in expected:
        assert os.path.exists(cache.checksum_to_path_info(checksum))
Пример #2
0
    def _get_checksum(self, locked=True):
        from dvc.repo.tree import RepoTree

        with self._make_repo(locked=locked) as repo:
            try:
                return repo.find_out_by_relpath(self.def_path).info["md5"]
            except OutputNotFoundError:
                path = PathInfo(os.path.join(repo.root_dir, self.def_path))

                # we want stream but not fetch, so DVC out directories are
                # walked, but dir contents is not fetched
                tree = RepoTree(repo, stream=True)

                # We are polluting our repo cache with some dir listing here
                if tree.isdir(path):
                    return self.repo.cache.local.get_checksum(path, tree)
                return tree.get_file_checksum(path)