def test_open_dirty_no_hash(tmp_dir, dvc): tmp_dir.gen("file", "file") (tmp_dir / "file.dvc").write_text("outs:\n- path: file\n") tree = RepoTree(dvc) with tree.open("file", "r") as fobj: assert fobj.read() == "file"
def test_repotree_cache_save(tmp_dir, dvc, scm, erepo_dir, setup_remote): with erepo_dir.chdir(): erepo_dir.gen({"dir": {"subdir": {"foo": "foo"}, "bar": "bar"}}) erepo_dir.dvc_add("dir/subdir", commit="subdir") erepo_dir.scm_add("dir", commit="dir") setup_remote(erepo_dir.dvc) erepo_dir.dvc.push() # test only cares that either fetch or stream are set so that DVC dirs are # walked. # # for this test, all file objects are being opened() and copied from tree # into dvc.cache, not fetched or streamed from a remote tree = RepoTree(erepo_dir.dvc, stream=True) expected = [ tree.get_file_checksum(erepo_dir / path) for path in ("dir/bar", "dir/subdir/foo") ] with erepo_dir.dvc.state: cache = dvc.cache.local with cache.state: cache.save(PathInfo(erepo_dir / "dir"), None, tree=tree) for checksum in expected: assert os.path.exists(cache.checksum_to_path_info(checksum))
def test_open_dirty_hash(tmp_dir, dvc): tmp_dir.dvc_gen("file", "file") (tmp_dir / "file").write_text("something") tree = RepoTree(dvc) with tree.open("file", "r") as fobj: assert fobj.read() == "something"
def test_exists(tmp_dir, dvc): tmp_dir.gen("foo", "foo") dvc.add("foo") (tmp_dir / "foo").unlink() tree = RepoTree(dvc) assert tree.exists("foo")
def test_repotree_cache_save(tmp_dir, dvc, scm, erepo_dir, local_cloud): with erepo_dir.chdir(): erepo_dir.gen({"dir": {"subdir": {"foo": "foo"}, "bar": "bar"}}) erepo_dir.dvc_add("dir/subdir", commit="subdir") erepo_dir.scm_add("dir", commit="dir") erepo_dir.add_remote(config=local_cloud.config) erepo_dir.dvc.push() # test only cares that either fetch or stream are set so that DVC dirs are # walked. # # for this test, all file objects are being opened() and copied from tree # into dvc.cache, not fetched or streamed from a remote tree = RepoTree(erepo_dir.dvc, stream=True) expected = [ tree.get_file_hash(PathInfo(erepo_dir / path)) for path in ("dir/bar", "dir/subdir/foo") ] with erepo_dir.dvc.state: cache = dvc.cache.local with cache.tree.state: path_info = PathInfo(erepo_dir / "dir") hash_info = cache.tree.save_info(path_info) cache.save(path_info, tree, hash_info) for hash_ in expected: assert os.path.exists(cache.tree.hash_to_path_info(hash_))
def test_walk(tmp_dir, dvc, dvcfiles, extra_expected): tmp_dir.gen({ "dir": { "subdir1": { "foo1": "foo1", "bar1": "bar1" }, "subdir2": { "foo2": "foo2" }, } }) dvc.add(str(tmp_dir / "dir"), recursive=True) tmp_dir.gen({"dir": {"foo": "foo", "bar": "bar"}}) tree = RepoTree(dvc) expected = [ PathInfo("dir") / "subdir1", PathInfo("dir") / "subdir2", PathInfo("dir") / "subdir1" / "foo1", PathInfo("dir") / "subdir1" / "bar1", PathInfo("dir") / "subdir2" / "foo2", PathInfo("dir") / "foo", PathInfo("dir") / "bar", ] actual = [] for root, dirs, files in tree.walk("dir", dvcfiles=dvcfiles): for entry in dirs + files: actual.append(os.path.join(root, entry)) expected = [str(path) for path in expected + extra_expected] assert set(actual) == set(expected) assert len(actual) == len(expected)
def collect(self, targets=None, revs=None): """Collects all props and data for plots. Returns a structure like: {rev: {plots.csv: { props: {x: ..., "header": ..., ...}, data: "...data as a string...", }}} Data parsing is postponed, since it's affected by props. """ targets = [targets] if isinstance(targets, str) else targets or [] data = {} for rev in self.repo.brancher(revs=revs): # .brancher() adds unwanted workspace if revs is not None and rev not in revs: continue rev = rev or "workspace" tree = RepoTree(self.repo) plots = _collect_plots(self.repo, targets, rev) for path_info, props in plots.items(): datafile = relpath(path_info, self.repo.root_dir) if rev not in data: data[rev] = {} data[rev].update({datafile: {"props": props}}) # Load data from git or dvc cache try: with tree.open(path_info) as fd: data[rev][datafile]["data"] = fd.read() except FileNotFoundError: # This might happen simply because cache is absent pass return data
def test_walk(tmp_dir, dvc): tmp_dir.gen( { "dir": { "subdir1": {"foo1": "foo1", "bar1": "bar1"}, "subdir2": {"foo2": "foo2"}, } } ) dvc.add(str(tmp_dir / "dir"), recursive=True) tmp_dir.gen({"dir": {"foo": "foo", "bar": "bar"}}) tree = RepoTree(dvc) expected = [ os.path.join("dir", "subdir1"), os.path.join("dir", "subdir2"), os.path.join("dir", "subdir1", "foo1"), os.path.join("dir", "subdir1", "foo1.dvc"), os.path.join("dir", "subdir1", "bar1"), os.path.join("dir", "subdir1", "bar1.dvc"), os.path.join("dir", "subdir2", "foo2"), os.path.join("dir", "subdir2", "foo2.dvc"), os.path.join("dir", "foo"), os.path.join("dir", "bar"), ] actual = [] for root, dirs, files in tree.walk("dir"): for entry in dirs + files: actual.append(os.path.join(root, entry)) assert set(actual) == set(expected) assert len(actual) == len(expected)
def test_isdir_mixed(tmp_dir, dvc): tmp_dir.gen({"dir": {"foo": "foo", "bar": "bar"}}) dvc.add(str(tmp_dir / "dir" / "foo")) tree = RepoTree(dvc) assert tree.isdir("dir") assert not tree.isfile("dir")
def test_open(tmp_dir, dvc): tmp_dir.gen("foo", "foo") dvc.add("foo") (tmp_dir / "foo").unlink() tree = RepoTree(dvc) with tree.open("foo", "r") as fobj: assert fobj.read() == "foo"
def open_by_relpath(self, path, mode="r", encoding=None, **kwargs): """Opens a specified resource as a file object.""" tree = RepoTree(self) try: with tree.open(path, mode=mode, encoding=encoding, **kwargs) as fobj: yield fobj except FileNotFoundError: raise PathMissingError(path, self.url)
def _load_from_revision(repo, datafile, revision): from dvc.repo.tree import RepoTree tree = RepoTree(repo) try: with tree.open(datafile) as fobj: datafile_content = fobj.read() except (FileNotFoundError, PathMissingError): raise NoMetricOnRevisionError(datafile, revision) return plot_data(datafile, revision, datafile_content)
def test_repotree_walk_fetch(tmp_dir, dvc, scm, local_remote): out = tmp_dir.dvc_gen({"dir": {"foo": "foo"}}, commit="init")[0].outs[0] dvc.push() remove(dvc.cache.local.cache_dir) tree = RepoTree(dvc, fetch=True) with dvc.state: for _, _, _ in tree.walk("dir"): pass assert os.path.exists(out.cache_path) for entry in out.dir_cache: hash_ = entry[out.tree.PARAM_CHECKSUM] assert os.path.exists(dvc.cache.local.tree.hash_to_path_info(hash_))
def _ls(repo, path_info, recursive=None, dvc_only=False): from dvc.repo.tree import RepoTree def onerror(exc): raise exc # use our own RepoTree instance instead of repo.repo_tree since we do not # want fetch/stream enabled for ls tree = RepoTree(repo) ret = {} try: for root, dirs, files in tree.walk(path_info.fspath, onerror=onerror, dvcfiles=True): for fname in files: info = PathInfo(root) / fname dvc = tree.isdvc(info) if dvc or not dvc_only: path = str(info.relative_to(path_info)) ret[path] = { "isout": dvc, "isdir": False, "isexec": False if dvc else tree.isexec(info), } if not recursive: for dname in dirs: info = PathInfo(root) / dname if not dvc_only or (tree.dvctree and tree.dvctree.exists(info)): dvc = tree.isdvc(info) path = str(info.relative_to(path_info)) ret[path] = { "isout": dvc, "isdir": True, "isexec": False if dvc else tree.isexec(info), } break except NotADirectoryError: dvc = tree.isdvc(path_info) if dvc or not dvc_only: return { path_info.name: { "isout": dvc, "isdir": False, "isexec": False if dvc else tree.isexec(path_info), } } return {} except FileNotFoundError: return {} return ret
def _get_checksum(self, locked=True): from dvc.repo.tree import RepoTree with self._make_repo(locked=locked) as repo: try: return repo.find_out_by_relpath(self.def_path).info["md5"] except OutputNotFoundError: path = PathInfo(os.path.join(repo.root_dir, self.def_path)) # we want stream but not fetch, so DVC out directories are # walked, but dir contents is not fetched tree = RepoTree(repo, stream=True) # We are polluting our repo cache with some dir listing here if tree.isdir(path): return self.repo.cache.local.get_hash(path, tree=tree) return tree.get_file_hash(path)
def open_by_relpath(self, path, remote=None, mode="r", encoding=None): """Opens a specified resource as a file descriptor""" tree = RepoTree(self, stream=True) path = os.path.join(self.root_dir, path) try: with tree.open( os.path.join(self.root_dir, path), mode=mode, encoding=encoding, remote=remote, ) as fobj: yield fobj except FileNotFoundError as exc: raise FileMissingError(path) from exc except IsADirectoryError as exc: raise DvcIsADirectoryError from exc
def test_open_in_history(tmp_dir, scm, dvc): tmp_dir.gen("foo", "foo") dvc.add("foo") dvc.scm.add(["foo.dvc", ".gitignore"]) dvc.scm.commit("foo") tmp_dir.gen("foo", "foofoo") dvc.add("foo") dvc.scm.add(["foo.dvc", ".gitignore"]) dvc.scm.commit("foofoo") for rev in dvc.brancher(revs=["HEAD~1"]): if rev == "working tree": continue tree = RepoTree(dvc) with tree.open("foo", "r") as fobj: assert fobj.read() == "foo"
def test_isdvc(tmp_dir, dvc): tmp_dir.gen({"foo": "foo", "bar": "bar", "dir": {"baz": "baz"}}) dvc.add("foo") dvc.add("dir") tree = RepoTree(dvc) assert tree.isdvc("foo") assert not tree.isdvc("bar") assert tree.isdvc("dir") assert not tree.isdvc("dir/baz") assert tree.isdvc("dir/baz", recursive=True, strict=False)
def _read_metrics(repo, metrics, rev): tree = RepoTree(repo) res = {} for metric in metrics: if not tree.exists(fspath_py35(metric)): continue with tree.open(fspath_py35(metric), "r") as fobj: try: # NOTE this also supports JSON val = yaml.safe_load(fobj) except yaml.YAMLError: logger.debug( "failed to read '%s' on '%s'", metric, rev, exc_info=True ) continue val = _extract_metrics(val) if val: res[str(metric)] = val return res
def test_walk_onerror(tmp_dir, dvc): def onerror(exc): raise exc tmp_dir.dvc_gen("foo", "foo") tree = RepoTree(dvc) # path does not exist for _ in tree.walk("dir"): pass with pytest.raises(OSError): for _ in tree.walk("dir", onerror=onerror): pass # path is not a directory for _ in tree.walk("foo"): pass with pytest.raises(OSError): for _ in tree.walk("foo", onerror=onerror): pass
def test_isdvc(tmp_dir, dvc): tmp_dir.gen({"foo": "foo", "bar": "bar"}) dvc.add("foo") tree = RepoTree(dvc) assert tree.isdvc("foo") assert not tree.isdvc("bar")
def repo_tree(self): return RepoTree(self, fetch=True)
def test_isdir_isfile(tmp_dir, dvc): tmp_dir.gen({"datafile": "data", "datadir": {"foo": "foo", "bar": "bar"}}) tree = RepoTree(dvc) assert tree.isdir("datadir") assert not tree.isfile("datadir") assert not tree.isdvc("datadir") assert not tree.isdir("datafile") assert tree.isfile("datafile") assert not tree.isdvc("datafile") dvc.add(["datadir", "datafile"]) shutil.rmtree(tmp_dir / "datadir") (tmp_dir / "datafile").unlink() assert tree.isdir("datadir") assert not tree.isfile("datadir") assert tree.isdvc("datadir") assert not tree.isdir("datafile") assert tree.isfile("datafile") assert tree.isdvc("datafile")