示例#1
0
def test_walk_dirty(tmp_dir, dvc):
    tmp_dir.dvc_gen(
        {
            "dir": {
                "foo": "foo",
                "subdir1": {"foo1": "foo1", "bar1": "bar1"},
                "subdir2": {"foo2": "foo2"},
            }
        }
    )
    tmp_dir.gen({"dir": {"bar": "bar", "subdir3": {"foo3": "foo3"}}})
    (tmp_dir / "dir" / "foo").unlink()

    tree = RepoTree(dvc)
    expected = [
        PathInfo("dir") / "subdir1",
        PathInfo("dir") / "subdir2",
        PathInfo("dir") / "subdir3",
        PathInfo("dir") / "subdir1" / "foo1",
        PathInfo("dir") / "subdir1" / "bar1",
        PathInfo("dir") / "subdir2" / "foo2",
        PathInfo("dir") / "subdir3" / "foo3",
        PathInfo("dir") / "bar",
    ]

    actual = []
    for root, dirs, files in tree.walk("dir"):
        for entry in dirs + files:
            actual.append(os.path.join(root, entry))

    expected = [str(path) for path in expected]
    assert set(actual) == set(expected)
    assert len(actual) == len(expected)
示例#2
0
def test_walk_mixed_dir(tmp_dir, scm, dvc):
    tmp_dir.gen({"dir": {"foo": "foo", "bar": "bar"}})
    tmp_dir.dvc.add(os.path.join("dir", "foo"))
    tmp_dir.scm.add(
        [
            os.path.join("dir", "bar"),
            os.path.join("dir", ".gitignore"),
            os.path.join("dir", "foo.dvc"),
        ]
    )
    tmp_dir.scm.commit("add dir")

    tree = RepoTree(dvc)

    expected = [
        str(PathInfo("dir") / "foo"),
        str(PathInfo("dir") / "bar"),
        str(PathInfo("dir") / ".gitignore"),
    ]
    actual = []
    for root, dirs, files in tree.walk("dir"):
        for entry in dirs + files:
            actual.append(os.path.join(root, entry))

    assert set(actual) == set(expected)
    assert len(actual) == len(expected)
示例#3
0
def _collect_plots(repo, targets=None, rev=None):
    plots = {out for stage in repo.stages for out in stage.outs if out.plot}

    def to_result(plots):
        return {plot.path_info: _plot_props(plot) for plot in plots}

    if not targets:
        return to_result(plots)

    target_infos = {PathInfo(os.path.abspath(target)) for target in targets}

    target_plots = set()
    for p in plots:
        if p.path_info in target_infos:
            target_plots.add(p)
            target_infos.remove(p.path_info)

    tree = RepoTree(repo)
    result = to_result(target_plots)

    for t in target_infos:
        if tree.isfile(t):
            result[t] = {}
        else:
            logger.warning(
                "'%s' was not found at: '%s'. It will not be plotted.",
                t,
                rev,
            )

    return result
示例#4
0
def test_open_dirty_no_hash(tmp_dir, dvc):
    tmp_dir.gen("file", "file")
    (tmp_dir / "file.dvc").write_text("outs:\n- path: file\n")

    tree = RepoTree(dvc)
    with tree.open("file", "r") as fobj:
        assert fobj.read() == "file"
示例#5
0
def test_exists(tmp_dir, dvc):
    tmp_dir.gen("foo", "foo")
    dvc.add("foo")
    (tmp_dir / "foo").unlink()

    tree = RepoTree(dvc)
    assert tree.exists("foo")
示例#6
0
文件: collect.py 项目: hitman56/dvc
def _collect_paths(
    repo: Repo,
    targets: Iterable[str],
    recursive: bool = False,
    rev: str = None,
):
    path_infos = {PathInfo(os.path.abspath(target)) for target in targets}
    tree = RepoTree(repo)

    target_infos = set()
    for path_info in path_infos:

        if recursive and tree.isdir(path_info):
            target_infos.update(set(tree.walk_files(path_info)))

        if not tree.exists(path_info):
            if not recursive:
                logger.warning(
                    "'%s' was not found at: '%s'.",
                    path_info,
                    rev,
                )
            continue
        target_infos.add(path_info)
    return target_infos
示例#7
0
    def collect(self, targets=None, revs=None):
        """Collects all props and data for plots.

        Returns a structure like:
            {rev: {plots.csv: {
                props: {x: ..., "header": ..., ...},
                data: "...data as a string...",
            }}}
        Data parsing is postponed, since it's affected by props.
        """
        targets = [targets] if isinstance(targets, str) else targets or []
        data = {}
        for rev in self.repo.brancher(revs=revs):
            # .brancher() adds unwanted workspace
            if revs is not None and rev not in revs:
                continue
            rev = rev or "workspace"

            tree = RepoTree(self.repo)
            plots = _collect_plots(self.repo, targets, rev)
            for path_info, props in plots.items():
                datafile = relpath(path_info, self.repo.root_dir)
                if rev not in data:
                    data[rev] = {}
                data[rev].update({datafile: {"props": props}})

                # Load data from git or dvc cache
                try:
                    with tree.open(path_info) as fd:
                        data[rev][datafile]["data"] = fd.read()
                except FileNotFoundError:
                    # This might happen simply because cache is absent
                    pass

        return data
示例#8
0
文件: collect.py 项目: jubaer145/dvc
def _collect_paths(
    repo: "Repo",
    targets: Iterable[str],
    recursive: bool = False,
    rev: str = None,
):
    from dvc.tree.repo import RepoTree

    path_infos = [PathInfo(os.path.abspath(target)) for target in targets]
    tree = RepoTree(repo)

    target_infos = []
    for path_info in path_infos:

        if recursive and tree.isdir(path_info):
            target_infos.extend(tree.walk_files(path_info))

        if not tree.exists(path_info):
            if not recursive:
                if rev == "workspace" or rev == "":
                    logger.warning(
                        "'%s' was not found in current workspace.", path_info,
                    )
                else:
                    logger.warning(
                        "'%s' was not found at: '%s'.", path_info, rev,
                    )
            continue
        target_infos.append(path_info)
    return target_infos
示例#9
0
def test_walk(tmp_dir, dvc, dvcfiles, extra_expected):
    tmp_dir.gen({
        "dir": {
            "subdir1": {
                "foo1": "foo1",
                "bar1": "bar1"
            },
            "subdir2": {
                "foo2": "foo2"
            },
        }
    })
    dvc.add(str(tmp_dir / "dir"), recursive=True)
    tmp_dir.gen({"dir": {"foo": "foo", "bar": "bar"}})
    tree = RepoTree(dvc)

    expected = [
        PathInfo("dir") / "subdir1",
        PathInfo("dir") / "subdir2",
        PathInfo("dir") / "subdir1" / "foo1",
        PathInfo("dir") / "subdir1" / "bar1",
        PathInfo("dir") / "subdir2" / "foo2",
        PathInfo("dir") / "foo",
        PathInfo("dir") / "bar",
    ]

    actual = []
    for root, dirs, files in tree.walk("dir", dvcfiles=dvcfiles):
        for entry in dirs + files:
            actual.append(os.path.join(root, entry))

    expected = [str(path) for path in expected + extra_expected]
    assert set(actual) == set(expected)
    assert len(actual) == len(expected)
示例#10
0
文件: show.py 项目: zang3tsu/dvc
def _collect_metrics(repo, targets, recursive):

    if targets:
        target_infos = [
            PathInfo(os.path.abspath(target)) for target in targets
        ]
        tree = RepoTree(repo)

        rec_files = []
        if recursive:
            for target_info in target_infos:
                if tree.isdir(target_info):
                    rec_files.extend(list(tree.walk_files(target_info)))

        result = [t for t in target_infos if tree.isfile(t)]
        result.extend(rec_files)

        return result

    metrics = set()
    for stage in repo.stages:
        for out in stage.outs:
            if not out.metric:
                continue
            metrics.add(out.path_info)
    return list(metrics)
示例#11
0
def test_get_hash_cached_dir(tmp_dir, dvc, mocker):
    tmp_dir.dvc_gen(
        {"dir": {
            "foo": "foo",
            "bar": "bar",
            "subdir": {
                "data": "data"
            }
        }})
    tree = RepoTree(dvc)
    get_file_hash_spy = mocker.spy(tree, "get_file_hash")
    dvc_tree_spy = mocker.spy(tree._dvctrees[dvc.root_dir], "get_dir_hash")
    with dvc.state:
        assert tree.get_hash(PathInfo(tmp_dir) / "dir") == HashInfo(
            "md5",
            "8761c4e9acad696bee718615e23e22db.dir",
        )
    assert get_file_hash_spy.called
    assert not dvc_tree_spy.called
    get_file_hash_spy.reset_mock()

    shutil.rmtree(tmp_dir / "dir")
    with dvc.state:
        assert tree.get_hash(PathInfo(tmp_dir) / "dir") == HashInfo(
            "md5",
            "8761c4e9acad696bee718615e23e22db.dir",
        )
    assert not get_file_hash_spy.called
    assert dvc_tree_spy.called
示例#12
0
def test_open_dirty_hash(tmp_dir, dvc):
    tmp_dir.dvc_gen("file", "file")
    (tmp_dir / "file").write_text("something")

    tree = RepoTree(dvc)
    with tree.open("file", "r") as fobj:
        assert fobj.read() == "something"
示例#13
0
def test_repotree_cache_save(tmp_dir, dvc, scm, erepo_dir, local_cloud):
    with erepo_dir.chdir():
        erepo_dir.gen({"dir": {"subdir": {"foo": "foo"}, "bar": "bar"}})
        erepo_dir.dvc_add("dir/subdir", commit="subdir")
        erepo_dir.scm_add("dir", commit="dir")
        erepo_dir.add_remote(config=local_cloud.config)
        erepo_dir.dvc.push()

    # test only cares that either fetch or stream are set so that DVC dirs are
    # walked.
    #
    # for this test, all file objects are being opened() and copied from tree
    # into dvc.cache, not fetched or streamed from a remote
    tree = RepoTree(erepo_dir.dvc, stream=True)
    expected = [
        tree.get_file_hash(PathInfo(erepo_dir / path)).value
        for path in ("dir/bar", "dir/subdir/foo")
    ]

    cache = dvc.cache.local
    path_info = PathInfo(erepo_dir / "dir")
    hash_info = cache.tree.get_hash(path_info)
    cache.save(path_info, tree, hash_info)

    for hash_ in expected:
        assert os.path.exists(cache.tree.hash_to_path_info(hash_))
示例#14
0
文件: diff.py 项目: zang3tsu/dvc
def _filter_missing(repo, paths):
    repo_tree = RepoTree(repo, stream=True)
    for path in paths:
        metadata = repo_tree.metadata(path)
        if metadata.is_dvc:
            out = metadata.outs[0]
            if out.status()[str(out)] == "not in cache":
                yield path
示例#15
0
文件: test_repo.py 项目: vijay120/dvc
def test_get_hash_dirty_file(tmp_dir, dvc):
    tmp_dir.dvc_gen("file", "file")
    (tmp_dir / "file").write_text("something")

    tree = RepoTree(dvc)
    actual = tree.get_hash(PathInfo(tmp_dir) / "file")
    expected = HashInfo("md5", "8c7dd922ad47494fc02c388e12c00eac")
    assert actual == expected
示例#16
0
def test_isdir_mixed(tmp_dir, dvc):
    tmp_dir.gen({"dir": {"foo": "foo", "bar": "bar"}})

    dvc.add(str(tmp_dir / "dir" / "foo"))

    tree = RepoTree(dvc)
    assert tree.isdir("dir")
    assert not tree.isfile("dir")
示例#17
0
文件: test_repo.py 项目: vijay120/dvc
def test_open(tmp_dir, dvc):
    tmp_dir.gen("foo", "foo")
    dvc.add("foo")
    (tmp_dir / "foo").unlink()

    tree = RepoTree(dvc)
    with tree.open("foo", "r") as fobj:
        assert fobj.read() == "foo"
示例#18
0
def test_get_hash_cached_file(tmp_dir, dvc, mocker):
    tmp_dir.dvc_gen({"foo": "foo"})
    tree = RepoTree(dvc)
    dvc_tree_spy = mocker.spy(tree._dvctrees[dvc.root_dir], "get_file_hash")
    assert tree.get_hash(PathInfo(tmp_dir) / "foo") == HashInfo(
        "md5", "acbd18db4cc2f85cedef654fccc4a4d8",
    )
    assert dvc_tree_spy.called
示例#19
0
文件: test_repo.py 项目: vijay120/dvc
def test_get_hash_dirty_dir(tmp_dir, dvc):
    tmp_dir.dvc_gen({"dir": {"foo": "foo", "bar": "bar"}})
    (tmp_dir / "dir" / "baz").write_text("baz")

    tree = RepoTree(dvc)
    actual = tree.get_hash(PathInfo(tmp_dir) / "dir")
    expected = HashInfo("md5", "ba75a2162ca9c29acecb7957105a0bc2.dir")
    assert actual == expected
    assert actual.dir_info.nfiles == 3
示例#20
0
 def open_by_relpath(self, path, mode="r", encoding=None, **kwargs):
     """Opens a specified resource as a file object."""
     tree = RepoTree(self)
     try:
         with tree.open(path, mode=mode, encoding=encoding,
                        **kwargs) as fobj:
             yield fobj
     except FileNotFoundError:
         raise PathMissingError(path, self.url)
示例#21
0
    def collect(
        self,
        targets: List[str] = None,
        revs: List[str] = None,
        recursive: bool = False,
    ) -> Dict[str, Dict]:
        """Collects all props and data for plots.

        Returns a structure like:
            {rev: {plots.csv: {
                props: {x: ..., "header": ..., ...},
                data: "...data as a string...",
            }}}
        Data parsing is postponed, since it's affected by props.
        """
        from dvc.tree.repo import RepoTree

        targets = [targets] if isinstance(targets, str) else targets or []
        data = {}
        for rev in self.repo.brancher(revs=revs):
            # .brancher() adds unwanted workspace
            if revs is not None and rev not in revs:
                continue
            rev = rev or "workspace"

            tree = RepoTree(self.repo)
            plots = _collect_plots(self.repo, targets, rev, recursive)
            for path_info, props in plots.items():

                if rev not in data:
                    data[rev] = {}

                if tree.isdir(path_info):
                    plot_files = []
                    for pi in tree.walk_files(path_info):
                        plot_files.append(
                            (pi, relpath(pi, self.repo.root_dir))
                        )
                else:
                    plot_files = [
                        (path_info, relpath(path_info, self.repo.root_dir))
                    ]

                for path, repo_path in plot_files:
                    data[rev].update({repo_path: {"props": props}})

                    # Load data from git or dvc cache
                    try:
                        with tree.open(path) as fd:
                            data[rev][repo_path]["data"] = fd.read()
                    except FileNotFoundError:
                        # This might happen simply because cache is absent
                        pass

        return data
示例#22
0
文件: diff.py 项目: hitman56/dvc
def _filter_missing(repo, paths):
    repo_tree = RepoTree(repo, stream=True)
    for path in paths:
        try:
            metadata = repo_tree.metadata(path)
            if metadata.is_dvc:
                out = metadata.outs[0]
                if out.status().get(str(out)) == "not in cache":
                    yield path
        except FileNotFoundError:
            pass
示例#23
0
文件: test_repo.py 项目: vijay120/dvc
def test_subrepo_walk(tmp_dir, scm, dvc, dvcfiles, extra_expected):
    tmp_dir.scm_gen(
        {"dir": {
            "repo.txt": "file to confuse RepoTree"
        }},
        commit="dir/repo.txt",
    )

    subrepo1 = tmp_dir / "dir" / "repo"
    subrepo2 = tmp_dir / "dir" / "repo2"

    subdirs = [subrepo1, subrepo2]
    for dir_ in subdirs:
        make_subrepo(dir_, scm)

    subrepo1.dvc_gen({"foo": "foo", "dir1": {"bar": "bar"}}, commit="FOO")
    subrepo2.dvc_gen({
        "lorem": "lorem",
        "dir2": {
            "ipsum": "ipsum"
        }
    },
                     commit="BAR")

    # using tree that does not have dvcignore
    dvc.tree._reset()
    tree = RepoTree(dvc, subrepos=True, fetch=True)
    expected = [
        PathInfo("dir") / "repo",
        PathInfo("dir") / "repo.txt",
        PathInfo("dir") / "repo2",
        PathInfo("dir") / "repo" / ".gitignore",
        PathInfo("dir") / "repo" / "foo",
        PathInfo("dir") / "repo" / "dir1",
        PathInfo("dir") / "repo" / "dir1" / "bar",
        PathInfo("dir") / "repo2" / ".gitignore",
        PathInfo("dir") / "repo2" / "lorem",
        PathInfo("dir") / "repo2" / "dir2",
        PathInfo("dir") / "repo2" / "dir2" / "ipsum",
    ]

    actual = []
    for root, dirs, files in tree.walk(os.path.join(tree.root_dir, "dir"),
                                       dvcfiles=dvcfiles):
        for entry in dirs + files:
            actual.append(os.path.join(root, entry))

    expected = [
        os.path.join(tree.root_dir, path) for path in expected + extra_expected
    ]
    assert set(actual) == set(expected)
    assert len(actual) == len(expected)
示例#24
0
文件: diff.py 项目: hitman56/dvc
def _targets_to_path_infos(repo, targets):
    path_infos = []
    missing = []

    repo_tree = RepoTree(repo, stream=True)

    for target in targets:
        if repo_tree.exists(target):
            path_infos.append(repo_tree.metadata(target).path_info)
        else:
            missing.append(target)

    return path_infos, missing
示例#25
0
def test_walk_nested_subrepos(tmp_dir, dvc, scm, traverse_subrepos):
    # generate a dvc and fs structure, with suffix based on repo's basename
    def fs_structure(suffix):
        return {
            f"foo-{suffix}": f"foo-{suffix}",
            f"dir-{suffix}": {
                f"bar-{suffix}": f"bar-{suffix}"
            },
        }

    def dvc_structure(suffix):
        return {
            f"lorem-{suffix}": f"lorem-{suffix}",
            f"dvc-{suffix}": {
                f"ipsum-{suffix}": f"ipsum-{suffix}"
            },
        }

    paths = ["subrepo1", "subrepo2", "subrepo1/subrepo3"]
    subrepos = [tmp_dir / path for path in paths]
    for repo_dir in subrepos:
        make_subrepo(repo_dir, scm)

    extras = {".dvcignore", ".gitignore"}  # these files are always there
    expected = {}
    for repo_dir in subrepos + [tmp_dir]:
        base = os.path.basename(repo_dir)
        scm_files = fs_structure(base)
        dvc_files = dvc_structure(base)
        with repo_dir.chdir():
            repo_dir.scm_gen(scm_files, commit=f"git add in {repo_dir}")
            repo_dir.dvc_gen(dvc_files, commit=f"dvc add in {repo_dir}")

        if traverse_subrepos or repo_dir == tmp_dir:
            expected[str(repo_dir)] = set(scm_files.keys() | dvc_files.keys()
                                          | extras)
            # files inside a dvc directory
            expected[str(repo_dir / f"dvc-{base}")] = {f"ipsum-{base}"}
            # files inside a git directory
            expected[str(repo_dir / f"dir-{base}")] = {f"bar-{base}"}

    if traverse_subrepos:
        # update subrepos
        expected[str(tmp_dir)].update(["subrepo1", "subrepo2"])
        expected[str(tmp_dir / "subrepo1")].add("subrepo3")

    actual = {}
    tree = RepoTree(dvc, subrepos=traverse_subrepos)
    for root, dirs, files in tree.walk(str(tmp_dir)):
        actual[root] = set(dirs + files)
    assert expected == actual
示例#26
0
def test_repotree_walk_fetch(tmp_dir, dvc, scm, local_remote):
    out = tmp_dir.dvc_gen({"dir": {"foo": "foo"}}, commit="init")[0].outs[0]
    dvc.push()
    remove(dvc.cache.local.cache_dir)

    tree = RepoTree(dvc, fetch=True)
    with dvc.state:
        for _, _, _ in tree.walk("dir"):
            pass

    assert os.path.exists(out.cache_path)
    for entry in out.dir_cache:
        hash_ = entry[out.tree.PARAM_CHECKSUM]
        assert os.path.exists(dvc.cache.local.tree.hash_to_path_info(hash_))
示例#27
0
def test_get_hash_cached_granular(tmp_dir, dvc, mocker):
    tmp_dir.dvc_gen(
        {"dir": {"foo": "foo", "bar": "bar", "subdir": {"data": "data"}}}
    )
    tree = RepoTree(dvc)
    dvc_tree_spy = mocker.spy(tree._dvctrees[dvc.root_dir], "get_file_hash")
    subdir = PathInfo(tmp_dir) / "dir" / "subdir"
    assert tree.get_hash(subdir) == HashInfo(
        "md5", "af314506f1622d107e0ed3f14ec1a3b5.dir",
    )
    assert tree.get_hash(subdir / "data") == HashInfo(
        "md5", "8d777f385d3dfec8815d20f7496026dc",
    )
    assert dvc_tree_spy.called
示例#28
0
def test_repotree_walk_fetch(tmp_dir, dvc, scm, local_remote):
    out = tmp_dir.dvc_gen({"dir": {"foo": "foo"}}, commit="init")[0].outs[0]
    dvc.push()
    remove(dvc.cache.local.cache_dir)
    remove(tmp_dir / "dir")

    tree = RepoTree(dvc, fetch=True)
    for _, _, _ in tree.walk("dir"):
        pass

    assert os.path.exists(out.cache_path)
    for _, hi in out.dir_cache.items():
        assert hi.name == out.tree.PARAM_CHECKSUM
        assert os.path.exists(dvc.cache.local.tree.hash_to_path_info(hi.value))
示例#29
0
文件: test_repo.py 项目: vijay120/dvc
def test_get_hash_mixed_dir(tmp_dir, scm, dvc):
    tmp_dir.gen({"dir": {"foo": "foo", "bar": "bar"}})
    tmp_dir.dvc.add(os.path.join("dir", "foo"))
    tmp_dir.scm.add([
        os.path.join("dir", "bar"),
        os.path.join("dir", ".gitignore"),
        os.path.join("dir", "foo.dvc"),
    ])
    tmp_dir.scm.commit("add dir")

    tree = RepoTree(dvc)
    actual = tree.get_hash(PathInfo(tmp_dir) / "dir")
    expected = HashInfo("md5", "e1d9e8eae5374860ae025ec84cfd85c7.dir")
    assert actual == expected
示例#30
0
    def open_by_relpath(self, path, remote=None, mode="r", encoding=None):
        """Opens a specified resource as a file descriptor"""

        tree = RepoTree(self, stream=True, subrepos=True)
        path = PathInfo(self.root_dir) / path
        try:
            with self.state:
                with tree.open(
                    path, mode=mode, encoding=encoding, remote=remote,
                ) as fobj:
                    yield fobj
        except FileNotFoundError as exc:
            raise FileMissingError(path) from exc
        except IsADirectoryError as exc:
            raise DvcIsADirectoryError(f"'{path}' is a directory") from exc