def collect(self, targets=None, revs=None): """Collects all props and data for plots. Returns a structure like: {rev: {plots.csv: { props: {x: ..., "header": ..., ...}, data: "...data as a string...", }}} Data parsing is postponed, since it's affected by props. """ targets = [targets] if isinstance(targets, str) else targets or [] data = {} for rev in self.repo.brancher(revs=revs): # .brancher() adds unwanted workspace if revs is not None and rev not in revs: continue rev = rev or "workspace" tree = RepoTree(self.repo) plots = _collect_plots(self.repo, targets, rev) for path_info, props in plots.items(): datafile = relpath(path_info, self.repo.root_dir) if rev not in data: data[rev] = {} data[rev].update({datafile: {"props": props}}) # Load data from git or dvc cache try: with tree.open(path_info) as fd: data[rev][datafile]["data"] = fd.read() except FileNotFoundError: # This might happen simply because cache is absent pass return data
def test_open_dirty_no_hash(tmp_dir, dvc): tmp_dir.gen("file", "file") (tmp_dir / "file.dvc").write_text("outs:\n- path: file\n") tree = RepoTree(dvc) with tree.open("file", "r") as fobj: assert fobj.read() == "file"
def test_open_dirty_hash(tmp_dir, dvc): tmp_dir.dvc_gen("file", "file") (tmp_dir / "file").write_text("something") tree = RepoTree(dvc) with tree.open("file", "r") as fobj: assert fobj.read() == "something"
def test_open(tmp_dir, dvc): tmp_dir.gen("foo", "foo") dvc.add("foo") (tmp_dir / "foo").unlink() tree = RepoTree(dvc) with tree.open("foo", "r") as fobj: assert fobj.read() == "foo"
def open_by_relpath(self, path, mode="r", encoding=None, **kwargs): """Opens a specified resource as a file object.""" tree = RepoTree(self) try: with tree.open(path, mode=mode, encoding=encoding, **kwargs) as fobj: yield fobj except FileNotFoundError: raise PathMissingError(path, self.url)
def collect( self, targets: List[str] = None, revs: List[str] = None, recursive: bool = False, ) -> Dict[str, Dict]: """Collects all props and data for plots. Returns a structure like: {rev: {plots.csv: { props: {x: ..., "header": ..., ...}, data: "...data as a string...", }}} Data parsing is postponed, since it's affected by props. """ from dvc.tree.repo import RepoTree targets = [targets] if isinstance(targets, str) else targets or [] data = {} for rev in self.repo.brancher(revs=revs): # .brancher() adds unwanted workspace if revs is not None and rev not in revs: continue rev = rev or "workspace" tree = RepoTree(self.repo) plots = _collect_plots(self.repo, targets, rev, recursive) for path_info, props in plots.items(): if rev not in data: data[rev] = {} if tree.isdir(path_info): plot_files = [] for pi in tree.walk_files(path_info): plot_files.append( (pi, relpath(pi, self.repo.root_dir)) ) else: plot_files = [ (path_info, relpath(path_info, self.repo.root_dir)) ] for path, repo_path in plot_files: data[rev].update({repo_path: {"props": props}}) # Load data from git or dvc cache try: with tree.open(path) as fd: data[rev][repo_path]["data"] = fd.read() except FileNotFoundError: # This might happen simply because cache is absent pass return data
def open_by_relpath(self, path, remote=None, mode="r", encoding=None): """Opens a specified resource as a file descriptor""" tree = RepoTree(self, stream=True, subrepos=True) path = PathInfo(self.root_dir) / path try: with self.state: with tree.open( path, mode=mode, encoding=encoding, remote=remote, ) as fobj: yield fobj except FileNotFoundError as exc: raise FileMissingError(path) from exc except IsADirectoryError as exc: raise DvcIsADirectoryError(f"'{path}' is a directory") from exc
def test_open_in_history(tmp_dir, scm, dvc): tmp_dir.gen("foo", "foo") dvc.add("foo") dvc.scm.add(["foo.dvc", ".gitignore"]) dvc.scm.commit("foo") tmp_dir.gen("foo", "foofoo") dvc.add("foo") dvc.scm.add(["foo.dvc", ".gitignore"]) dvc.scm.commit("foofoo") for rev in dvc.brancher(revs=["HEAD~1"]): if rev == "workspace": continue tree = RepoTree(dvc) with tree.open("foo", "r") as fobj: assert fobj.read() == "foo"