Exemplo n.º 1
0
def _collect_paths(
    repo: "Repo",
    targets: Iterable[str],
    recursive: bool = False,
    rev: str = None,
):
    from dvc.tree.repo import RepoTree

    path_infos = [PathInfo(os.path.abspath(target)) for target in targets]
    tree = RepoTree(repo)

    target_infos = []
    for path_info in path_infos:

        if recursive and tree.isdir(path_info):
            target_infos.extend(tree.walk_files(path_info))

        if not tree.exists(path_info):
            if not recursive:
                if rev == "workspace" or rev == "":
                    logger.warning(
                        "'%s' was not found in current workspace.", path_info,
                    )
                else:
                    logger.warning(
                        "'%s' was not found at: '%s'.", path_info, rev,
                    )
            continue
        target_infos.append(path_info)
    return target_infos
Exemplo n.º 2
0
def _collect_paths(
    repo: Repo,
    targets: Iterable[str],
    recursive: bool = False,
    rev: str = None,
):
    path_infos = {PathInfo(os.path.abspath(target)) for target in targets}
    tree = RepoTree(repo)

    target_infos = set()
    for path_info in path_infos:

        if recursive and tree.isdir(path_info):
            target_infos.update(set(tree.walk_files(path_info)))

        if not tree.exists(path_info):
            if not recursive:
                logger.warning(
                    "'%s' was not found at: '%s'.",
                    path_info,
                    rev,
                )
            continue
        target_infos.add(path_info)
    return target_infos
Exemplo n.º 3
0
Arquivo: show.py Projeto: zang3tsu/dvc
def _collect_metrics(repo, targets, recursive):

    if targets:
        target_infos = [
            PathInfo(os.path.abspath(target)) for target in targets
        ]
        tree = RepoTree(repo)

        rec_files = []
        if recursive:
            for target_info in target_infos:
                if tree.isdir(target_info):
                    rec_files.extend(list(tree.walk_files(target_info)))

        result = [t for t in target_infos if tree.isfile(t)]
        result.extend(rec_files)

        return result

    metrics = set()
    for stage in repo.stages:
        for out in stage.outs:
            if not out.metric:
                continue
            metrics.add(out.path_info)
    return list(metrics)
Exemplo n.º 4
0
def test_isdir_mixed(tmp_dir, dvc):
    tmp_dir.gen({"dir": {"foo": "foo", "bar": "bar"}})

    dvc.add(str(tmp_dir / "dir" / "foo"))

    tree = RepoTree(dvc)
    assert tree.isdir("dir")
    assert not tree.isfile("dir")
Exemplo n.º 5
0
def test_repo_tree_no_subrepos(tmp_dir, dvc, scm):
    tmp_dir.scm_gen(
        {"dir": {
            "repo.txt": "file to confuse RepoTree"
        }},
        commit="dir/repo.txt",
    )
    tmp_dir.dvc_gen({"lorem": "lorem"}, commit="add foo")

    subrepo = tmp_dir / "dir" / "repo"
    make_subrepo(subrepo, scm)
    subrepo.dvc_gen({"foo": "foo", "dir1": {"bar": "bar"}}, commit="FOO")
    subrepo.scm_gen({"ipsum": "ipsum"}, commit="BAR")

    # using tree that does not have dvcignore
    dvc.tree._reset()
    tree = RepoTree(dvc, subrepos=False, fetch=True)
    expected = [
        tmp_dir / ".dvcignore",
        tmp_dir / ".gitignore",
        tmp_dir / "lorem",
        tmp_dir / "lorem.dvc",
        tmp_dir / "dir",
        tmp_dir / "dir" / "repo.txt",
    ]

    actual = []
    for root, dirs, files in tree.walk(tmp_dir, dvcfiles=True):
        for entry in dirs + files:
            actual.append(os.path.normpath(os.path.join(root, entry)))

    expected = [str(path) for path in expected]
    assert set(actual) == set(expected)
    assert len(actual) == len(expected)

    assert tree.isfile(tmp_dir / "lorem") is True
    assert tree.isfile(tmp_dir / "dir" / "repo" / "foo") is False
    assert tree.isdir(tmp_dir / "dir" / "repo") is False
    assert tree.isdir(tmp_dir / "dir") is True

    assert tree.isdvc(tmp_dir / "lorem") is True
    assert tree.isdvc(tmp_dir / "dir" / "repo" / "dir1") is False

    assert tree.exists(tmp_dir / "dir" / "repo.txt") is True
    assert tree.exists(tmp_dir / "repo" / "ipsum") is False
Exemplo n.º 6
0
def test_exists_isdir_isfile_dirty(tmp_dir, dvc):
    tmp_dir.dvc_gen(
        {"datafile": "data", "datadir": {"foo": "foo", "bar": "bar"}}
    )

    tree = RepoTree(dvc)
    shutil.rmtree(tmp_dir / "datadir")
    (tmp_dir / "datafile").unlink()

    root = PathInfo(tmp_dir)
    assert tree.exists(root / "datafile")
    assert tree.exists(root / "datadir")
    assert tree.exists(root / "datadir" / "foo")
    assert tree.isfile(root / "datafile")
    assert not tree.isfile(root / "datadir")
    assert tree.isfile(root / "datadir" / "foo")
    assert not tree.isdir(root / "datafile")
    assert tree.isdir(root / "datadir")
    assert not tree.isdir(root / "datadir" / "foo")

    # NOTE: creating file instead of dir and dir instead of file
    tmp_dir.gen({"datadir": "data", "datafile": {"foo": "foo", "bar": "bar"}})
    assert tree.exists(root / "datafile")
    assert tree.exists(root / "datadir")
    assert not tree.exists(root / "datadir" / "foo")
    assert tree.exists(root / "datafile" / "foo")
    assert not tree.isfile(root / "datafile")
    assert tree.isfile(root / "datadir")
    assert not tree.isfile(root / "datadir" / "foo")
    assert tree.isfile(root / "datafile" / "foo")
    assert tree.isdir(root / "datafile")
    assert not tree.isdir(root / "datadir")
    assert not tree.isdir(root / "datadir" / "foo")
    assert not tree.isdir(root / "datafile" / "foo")
Exemplo n.º 7
0
    def collect(
        self,
        targets: List[str] = None,
        revs: List[str] = None,
        recursive: bool = False,
    ) -> Dict[str, Dict]:
        """Collects all props and data for plots.

        Returns a structure like:
            {rev: {plots.csv: {
                props: {x: ..., "header": ..., ...},
                data: "...data as a string...",
            }}}
        Data parsing is postponed, since it's affected by props.
        """
        from dvc.tree.repo import RepoTree

        targets = [targets] if isinstance(targets, str) else targets or []
        data = {}
        for rev in self.repo.brancher(revs=revs):
            # .brancher() adds unwanted workspace
            if revs is not None and rev not in revs:
                continue
            rev = rev or "workspace"

            tree = RepoTree(self.repo)
            plots = _collect_plots(self.repo, targets, rev, recursive)
            for path_info, props in plots.items():

                if rev not in data:
                    data[rev] = {}

                if tree.isdir(path_info):
                    plot_files = []
                    for pi in tree.walk_files(path_info):
                        plot_files.append(
                            (pi, relpath(pi, self.repo.root_dir))
                        )
                else:
                    plot_files = [
                        (path_info, relpath(path_info, self.repo.root_dir))
                    ]

                for path, repo_path in plot_files:
                    data[rev].update({repo_path: {"props": props}})

                    # Load data from git or dvc cache
                    try:
                        with tree.open(path) as fd:
                            data[rev][repo_path]["data"] = fd.read()
                    except FileNotFoundError:
                        # This might happen simply because cache is absent
                        pass

        return data
Exemplo n.º 8
0
def test_isdir_isfile(tmp_dir, dvc):
    tmp_dir.gen({"datafile": "data", "datadir": {"foo": "foo", "bar": "bar"}})

    tree = RepoTree(dvc)
    assert tree.isdir("datadir")
    assert not tree.isfile("datadir")
    assert not tree.isdvc("datadir")
    assert not tree.isdir("datafile")
    assert tree.isfile("datafile")
    assert not tree.isdvc("datafile")

    dvc.add(["datadir", "datafile"])
    shutil.rmtree(tmp_dir / "datadir")
    (tmp_dir / "datafile").unlink()

    assert tree.isdir("datadir")
    assert not tree.isfile("datadir")
    assert tree.isdvc("datadir")
    assert not tree.isdir("datafile")
    assert tree.isfile("datafile")
    assert tree.isdvc("datafile")
Exemplo n.º 9
0
    def _get_checksum(self, locked=True):
        from dvc.tree.repo import RepoTree

        with self._make_repo(locked=locked) as repo:
            try:
                return repo.find_out_by_relpath(self.def_path).info["md5"]
            except OutputNotFoundError:
                path = PathInfo(os.path.join(repo.root_dir, self.def_path))

                # we want stream but not fetch, so DVC out directories are
                # walked, but dir contents is not fetched
                tree = RepoTree(repo, stream=True)

                # We are polluting our repo cache with some dir listing here
                if tree.isdir(path):
                    return self.repo.cache.local.tree.get_hash(path,
                                                               tree=tree)[1]
                return tree.get_file_hash(path)
Exemplo n.º 10
0
def test_subrepos(tmp_dir, scm, dvc):
    tmp_dir.scm_gen(
        {"dir": {
            "repo.txt": "file to confuse RepoTree"
        }},
        commit="dir/repo.txt",
    )

    subrepo1 = tmp_dir / "dir" / "repo"
    subrepo2 = tmp_dir / "dir" / "repo2"

    for repo in [subrepo1, subrepo2]:
        make_subrepo(repo, scm)

    subrepo1.dvc_gen({"foo": "foo", "dir1": {"bar": "bar"}}, commit="FOO")
    subrepo2.dvc_gen({
        "lorem": "lorem",
        "dir2": {
            "ipsum": "ipsum"
        }
    },
                     commit="BAR")

    dvc.tree._reset()
    tree = RepoTree(dvc, subrepos=True, fetch=True)

    def assert_tree_belongs_to_repo(ret_val):
        method = tree._get_repo

        def f(*args, **kwargs):
            r = method(*args, **kwargs)
            assert r.root_dir == ret_val.root_dir
            return r

        return f

    with mock.patch.object(
            tree,
            "_get_repo",
            side_effect=assert_tree_belongs_to_repo(subrepo1.dvc),
    ):
        assert tree.exists(subrepo1 / "foo") is True
        assert tree.exists(subrepo1 / "bar") is False

        assert tree.isfile(subrepo1 / "foo") is True
        assert tree.isfile(subrepo1 / "dir1" / "bar") is True
        assert tree.isfile(subrepo1 / "dir1") is False

        assert tree.isdir(subrepo1 / "dir1") is True
        assert tree.isdir(subrepo1 / "dir1" / "bar") is False
        assert tree.isdvc(subrepo1 / "foo") is True

    with mock.patch.object(
            tree,
            "_get_repo",
            side_effect=assert_tree_belongs_to_repo(subrepo2.dvc),
    ):
        assert tree.exists(subrepo2 / "lorem") is True
        assert tree.exists(subrepo2 / "ipsum") is False

        assert tree.isfile(subrepo2 / "lorem") is True
        assert tree.isfile(subrepo2 / "dir2" / "ipsum") is True
        assert tree.isfile(subrepo2 / "dir2") is False

        assert tree.isdir(subrepo2 / "dir2") is True
        assert tree.isdir(subrepo2 / "dir2" / "ipsum") is False
        assert tree.isdvc(subrepo2 / "lorem") is True