Пример #1
0
def test_get_hash_cached_granular(tmp_dir, dvc, mocker):
    tmp_dir.dvc_gen(
        {"dir": {
            "foo": "foo",
            "bar": "bar",
            "subdir": {
                "data": "data"
            }
        }})
    fs = RepoFileSystem(dvc)
    subdir = PathInfo(tmp_dir) / "dir" / "subdir"
    assert fs.info(subdir).get("md5") is None
    assert stage(dvc.odb.local, subdir, fs, "md5").hash_info == HashInfo(
        "md5",
        "af314506f1622d107e0ed3f14ec1a3b5.dir",
    )
    assert fs.info(subdir / "data").get("md5") is None
    assert stage(dvc.odb.local, subdir / "data", fs,
                 "md5").hash_info == HashInfo(
                     "md5",
                     "8d777f385d3dfec8815d20f7496026dc",
                 )
    (tmp_dir / "dir" / "subdir" / "data").unlink()
    assert (fs.info(subdir /
                    "data")["md5"] == "8d777f385d3dfec8815d20f7496026dc")
Пример #2
0
def test_dir_hash_should_be_key_order_agnostic(tmp_dir, dvc):
    from dvc.objects.stage import stage
    from dvc.objects.tree import Tree

    tmp_dir.gen({"data": {"1": "1 content", "2": "2 content"}})

    path_info = PathInfo("data")

    tree = Tree.from_list(
        [{"relpath": "1", "md5": "1"}, {"relpath": "2", "md5": "2"}]
    )
    tree.digest()
    with patch("dvc.objects.stage._get_tree_obj", return_value=tree):
        hash1 = stage(
            dvc.odb.local, path_info, dvc.odb.local.fs, "md5"
        ).hash_info

    tree = Tree.from_list(
        [{"md5": "1", "relpath": "1"}, {"md5": "2", "relpath": "2"}]
    )
    tree.digest()
    with patch("dvc.objects.stage._get_tree_obj", return_value=tree):
        hash2 = stage(
            dvc.odb.local, path_info, dvc.odb.local.fs, "md5"
        ).hash_info

    assert hash1 == hash2
Пример #3
0
def test_get_hash_cached_dir(tmp_dir, dvc, mocker):
    tmp_dir.dvc_gen(
        {"dir": {
            "foo": "foo",
            "bar": "bar",
            "subdir": {
                "data": "data"
            }
        }})
    fs = RepoFileSystem(dvc)
    expected = "8761c4e9acad696bee718615e23e22db.dir"
    assert fs.info(PathInfo(tmp_dir) / "dir").get("md5") is None
    assert stage(
        dvc.odb.local,
        PathInfo(tmp_dir) / "dir",
        fs,
        "md5",
    ).hash_info == HashInfo(
        "md5",
        "8761c4e9acad696bee718615e23e22db.dir",
    )

    shutil.rmtree(tmp_dir / "dir")
    assert fs.info(PathInfo(tmp_dir) / "dir")["md5"] == expected
    assert stage(
        dvc.odb.local,
        PathInfo(tmp_dir) / "dir",
        fs,
        "md5",
    ).hash_info == HashInfo(
        "md5",
        "8761c4e9acad696bee718615e23e22db.dir",
    )
Пример #4
0
def test_dir_hash_should_be_key_order_agnostic(tmp_dir, dvc):
    from dvc.objects.stage import stage
    from dvc.objects.tree import Tree

    tmp_dir.gen({"data": {"1": "1 content", "2": "2 content"}})

    path = (tmp_dir / "data").fs_path

    tree = Tree.from_list([{
        "relpath": "1",
        "md5": "1"
    }, {
        "relpath": "2",
        "md5": "2"
    }])
    tree.digest()
    with patch("dvc.objects.stage._stage_tree", return_value=(None, tree)):
        _, _, obj = stage(dvc.odb.local, path, dvc.odb.local.fs, "md5")
        hash1 = obj.hash_info

    tree = Tree.from_list([{
        "md5": "1",
        "relpath": "1"
    }, {
        "md5": "2",
        "relpath": "2"
    }])
    tree.digest()
    with patch("dvc.objects.stage._stage_tree", return_value=(None, tree)):
        _, _, obj = stage(dvc.odb.local, path, dvc.odb.local.fs, "md5")
        hash2 = obj.hash_info

    assert hash1 == hash2
Пример #5
0
def test_dir_hash_should_be_key_order_agnostic(tmp_dir, dvc):
    from dvc.objects.stage import stage

    tmp_dir.gen({"data": {"1": "1 content", "2": "2 content"}})

    path_info = PathInfo("data")

    dir_info = DirInfo.from_list([{
        "relpath": "1",
        "md5": "1"
    }, {
        "relpath": "2",
        "md5": "2"
    }])
    with patch(
            "dvc.objects.stage._collect_dir",
            return_value=dir_info,
    ):
        hash1 = stage(dvc.odb.local, path_info, dvc.odb.local.fs).hash_info

    dir_info = DirInfo.from_list([{
        "md5": "1",
        "relpath": "1"
    }, {
        "md5": "2",
        "relpath": "2"
    }])
    with patch(
            "dvc.objects.stage._collect_dir",
            return_value=dir_info,
    ):
        hash2 = stage(dvc.odb.local, path_info, dvc.odb.local.fs).hash_info

    assert hash1 == hash2
Пример #6
0
def test_fetch_external_repo_jobs(tmp_dir, scm, mocker, dvc, local_remote):
    tmp_dir.dvc_gen(
        {
            "dir1": {
                "file1": "file1",
                "file2": "file2",
                "file3": "file3",
                "file4": "file4",
            },
        },
        commit="init",
    )

    dvc.push()

    with external_repo(str(tmp_dir)) as repo:
        spy = mocker.spy(repo.cloud, "pull")

        obj = stage(
            dvc.odb.local,
            PathInfo(repo.root_dir) / "dir1",
            repo.repo_fs,
            follow_subrepos=False,
            jobs=3,
        )
        save(
            dvc.odb.local,
            obj,
            jobs=3,
        )

        run_jobs = tuple(spy.call_args_list[0])[1].get("jobs")
        assert run_jobs == 3
Пример #7
0
def test_get_hash_dirty_file(tmp_dir, dvc):
    tmp_dir.dvc_gen("file", "file")
    (tmp_dir / "file").write_text("something")

    fs = RepoFileSystem(dvc)
    assert fs.info(PathInfo(tmp_dir) / "file").get("md5") is None
    actual = stage(dvc.odb.local, PathInfo(tmp_dir) / "file", fs).hash_info
    expected = HashInfo("md5", "437b930db84b8079c2dd804a71936b5f")
    assert actual == expected

    (tmp_dir / "file").unlink()
    assert (fs.info(PathInfo(tmp_dir) /
                    "file")["md5"] == "8c7dd922ad47494fc02c388e12c00eac")
    actual = stage(dvc.odb.local, PathInfo(tmp_dir) / "file", fs).hash_info
    expected = HashInfo("md5", "8c7dd922ad47494fc02c388e12c00eac")
    assert actual == expected
Пример #8
0
def _changed(path_info, fs, obj, cache):
    logger.trace("checking if '%s'('%s') has changed.", path_info, obj)

    try:
        check(cache, obj)
    except (FileNotFoundError, ObjectFormatError):
        logger.debug("cache for '%s'('%s') has changed.", path_info,
                     obj.hash_info)
        return True

    try:
        actual = stage(cache, path_info, fs, obj.hash_info.name).hash_info
    except FileNotFoundError:
        logger.debug("'%s' doesn't exist.", path_info)
        return True

    if obj.hash_info != actual:
        logger.debug(
            "hash value '%s' for '%s' has changed (actual '%s').",
            obj.hash_info,
            actual,
            path_info,
        )
        return True

    logger.trace("'%s' hasn't changed.", path_info)
    return False
Пример #9
0
    def download(self, to, jobs=None):
        from dvc.checkout import checkout
        from dvc.config import NoRemoteError
        from dvc.exceptions import NoOutputOrStageError
        from dvc.objects import save
        from dvc.objects.stage import stage

        odb = self.repo.odb.local

        with self._make_repo(cache_dir=odb.cache_dir) as repo:
            if self.def_repo.get(self.PARAM_REV_LOCK) is None:
                self.def_repo[self.PARAM_REV_LOCK] = repo.get_rev()
            path_info = PathInfo(repo.root_dir) / self.def_path
            try:
                repo.fetch([path_info.fspath], jobs=jobs, recursive=True)
            except (NoOutputOrStageError, NoRemoteError):
                pass
            obj = stage(
                odb,
                path_info,
                repo.repo_fs,
                jobs=jobs,
                follow_subrepos=False,
            )
            save(odb, obj, jobs=jobs)

        checkout(to.path_info, to.fs, obj, odb)
Пример #10
0
def _diff(
    path_info,
    fs,
    obj,
    cache,
    relink=False,
    dvcignore: Optional[DvcIgnoreFilter] = None,
):
    old = None
    try:
        _, old = stage(
            cache,
            path_info,
            fs,
            obj.hash_info.name if obj else cache.fs.PARAM_CHECKSUM,
            dry_run=True,
            dvcignore=dvcignore,
        )
    except FileNotFoundError:
        pass

    diff = odiff(old, obj, cache)

    if relink:
        diff.modified.extend(diff.unchanged)

    return diff
Пример #11
0
def test_staging_file(tmp_dir, dvc):
    from dvc.objects import check
    from dvc.objects.stage import stage
    from dvc.objects.transfer import transfer

    tmp_dir.gen("foo", "foo")
    fs = LocalFileSystem()

    local_odb = dvc.odb.local
    staging_odb, obj = stage(local_odb, tmp_dir / "foo", fs, "md5")

    assert not local_odb.exists(obj.hash_info)
    assert staging_odb.exists(obj.hash_info)

    with pytest.raises(FileNotFoundError):
        check(local_odb, obj)
    check(staging_odb, obj)

    transfer(staging_odb, local_odb, {obj.hash_info}, move=True)
    check(local_odb, obj)
    with pytest.raises(FileNotFoundError):
        check(staging_odb, obj)

    path_info = local_odb.hash_to_path_info(obj.hash_info.value)
    assert fs.exists(path_info)
Пример #12
0
    def transfer(
        self,
        from_fs,
        from_info,
        jobs=None,
        update=False,
        no_progress_bar=False,
    ):
        # When running import-url --to-remote / add --to-remote/-o ... we
        # assume that it is unlikely that the odb will contain majority of the
        # hashes, so we transfer everything as is (even if that file might
        # already be in the cache) and don't waste an upload to scan the layout
        # of the source location. But when doing update --to-remote, there is
        # a high probability that the odb might contain some of the hashes, so
        # we first calculate all the hashes (but don't transfer anything) and
        # then only update the missing cache files.

        upload = not (update and from_fs.isdir(from_info))
        jobs = jobs or min((from_fs.jobs, self.odb.fs.jobs))
        obj = stage(
            self.odb,
            from_info,
            from_fs,
            "md5",
            upload=upload,
            jobs=jobs,
            no_progress_bar=no_progress_bar,
        )

        save(self.odb, obj, jobs=jobs, move=upload)
        return obj.hash_info
Пример #13
0
def test_get_hash_dirty_dir(tmp_dir, dvc):
    tmp_dir.dvc_gen({"dir": {"foo": "foo", "bar": "bar"}})
    (tmp_dir / "dir" / "baz").write_text("baz")

    fs = RepoFileSystem(dvc)
    actual = stage(dvc.odb.local, PathInfo(tmp_dir) / "dir", fs).hash_info
    expected = HashInfo("md5", "ba75a2162ca9c29acecb7957105a0bc2.dir")
    assert actual == expected
    assert actual.dir_info.nfiles == 3
Пример #14
0
def test_get_hash_cached_file(tmp_dir, dvc, mocker):
    tmp_dir.dvc_gen({"foo": "foo"})
    fs = RepoFileSystem(repo=dvc)
    expected = "acbd18db4cc2f85cedef654fccc4a4d8"
    assert fs.info(PathInfo(tmp_dir) / "foo").get("md5") is None
    _, _, obj = stage(dvc.odb.local, PathInfo(tmp_dir) / "foo", fs, "md5")
    assert obj.hash_info == HashInfo("md5", expected)
    (tmp_dir / "foo").unlink()
    assert fs.info(PathInfo(tmp_dir) / "foo")["md5"] == expected
Пример #15
0
def test_get_hash_dirty_dir(tmp_dir, dvc):
    tmp_dir.dvc_gen({"dir": {"foo": "foo", "bar": "bar"}})
    (tmp_dir / "dir" / "baz").write_text("baz")

    fs = DvcFileSystem(dvc)
    expected = "5ea40360f5b4ec688df672a4db9c17d1.dir"
    assert fs.info(PathInfo(tmp_dir) / "dir").get("md5") == expected
    assert stage(dvc.odb.local,
                 PathInfo(tmp_dir) / "dir", fs,
                 "md5").hash_info == HashInfo("md5", expected)
Пример #16
0
def test_get_hash_granular(tmp_dir, dvc):
    tmp_dir.dvc_gen(
        {"dir": {
            "foo": "foo",
            "bar": "bar",
            "subdir": {
                "data": "data"
            }
        }})
    fs = DvcFileSystem(repo=dvc)
    subdir = PathInfo(tmp_dir) / "dir" / "subdir"
    assert fs.info(subdir).get("md5") is None
    _, _, obj = stage(dvc.odb.local, subdir, fs, "md5", dry_run=True)
    assert obj.hash_info == HashInfo("md5",
                                     "af314506f1622d107e0ed3f14ec1a3b5.dir")
    assert (fs.info(subdir /
                    "data")["md5"] == "8d777f385d3dfec8815d20f7496026dc")
    _, _, obj = stage(dvc.odb.local, subdir / "data", fs, "md5", dry_run=True)
    assert obj.hash_info == HashInfo("md5", "8d777f385d3dfec8815d20f7496026dc")
Пример #17
0
def test_get_hash_dirty_file(tmp_dir, dvc):
    tmp_dir.dvc_gen("file", "file")
    (tmp_dir / "file").write_text("something")

    fs = DvcFileSystem(dvc)
    expected = "8c7dd922ad47494fc02c388e12c00eac"
    assert fs.info(PathInfo(tmp_dir) / "file").get("md5") == expected
    assert stage(dvc.odb.local,
                 PathInfo(tmp_dir) / "file", fs,
                 "md5").hash_info == HashInfo("md5", expected)
Пример #18
0
def test_get_hash_dirty_dir(tmp_dir, dvc):
    tmp_dir.dvc_gen({"dir": {"foo": "foo", "bar": "bar"}})
    (tmp_dir / "dir" / "baz").write_text("baz")
    clean_staging()

    fs = RepoFileSystem(repo=dvc)
    _, meta, obj = stage(dvc.odb.local, PathInfo(tmp_dir) / "dir", fs, "md5")
    assert obj.hash_info == HashInfo("md5",
                                     "ba75a2162ca9c29acecb7957105a0bc2.dir")
    assert meta.nfiles == 3
Пример #19
0
    def _get_used_and_obj(
        self,
        obj_only=False,
        **kwargs
    ) -> Tuple[Dict[Optional["ObjectDB"], Set["HashInfo"]], "HashFile"]:
        from dvc.config import NoRemoteError
        from dvc.exceptions import NoOutputOrStageError, PathMissingError
        from dvc.objects.stage import stage
        from dvc.objects.tree import Tree

        local_odb = self.repo.odb.local
        locked = kwargs.pop("locked", True)
        with self._make_repo(locked=locked,
                             cache_dir=local_odb.cache_dir) as repo:
            used_obj_ids = defaultdict(set)
            rev = repo.get_rev()
            if locked and self.def_repo.get(self.PARAM_REV_LOCK) is None:
                self.def_repo[self.PARAM_REV_LOCK] = rev

            path_info = PathInfo(repo.root_dir) / str(self.def_path)
            if not obj_only:
                try:
                    for odb, obj_ids in repo.used_objs(
                        [os.fspath(path_info)],
                            force=True,
                            jobs=kwargs.get("jobs"),
                            recursive=True,
                    ).items():
                        if odb is None:
                            odb = repo.cloud.get_remote_odb()
                            odb.read_only = True
                        self._check_circular_import(odb, obj_ids)
                        used_obj_ids[odb].update(obj_ids)
                except (NoRemoteError, NoOutputOrStageError):
                    pass

            try:
                staging, staged_obj = stage(
                    local_odb,
                    path_info,
                    repo.repo_fs,
                    local_odb.fs.PARAM_CHECKSUM,
                )
            except FileNotFoundError as exc:
                raise PathMissingError(self.def_path,
                                       self.def_repo[self.PARAM_URL]) from exc
            staging = copy(staging)
            staging.read_only = True

            self._staged_objs[rev] = staged_obj
            used_obj_ids[staging].add(staged_obj.hash_info)
            if isinstance(staged_obj, Tree):
                used_obj_ids[staging].update(entry.hash_info
                                             for _, entry in staged_obj)
            return used_obj_ids, staged_obj
Пример #20
0
def test_get_hash_dirty_dir(tmp_dir, dvc):
    tmp_dir.dvc_gen({"dir": {"foo": "foo", "bar": "bar"}})
    (tmp_dir / "dir" / "baz").write_text("baz")

    fs = DvcFileSystem(repo=dvc)
    expected = "5ea40360f5b4ec688df672a4db9c17d1.dir"
    assert fs.info((tmp_dir / "dir").fs_path).get("md5") == expected
    _, _, obj = stage(
        dvc.odb.local, (tmp_dir / "dir").fs_path, fs, "md5", dry_run=True
    )
    assert obj.hash_info == HashInfo("md5", expected)
Пример #21
0
    def _get_hash(self, locked=True):
        from dvc.objects.stage import stage

        with self._make_repo(locked=locked) as repo:
            path_info = PathInfo(repo.root_dir) / self.def_path
            return stage(
                self.repo.odb.local,
                path_info,
                repo.repo_fs,
                self.repo.odb.local.fs.PARAM_CHECKSUM,
            ).hash_info
Пример #22
0
    def _get_hash(self, locked=True):
        from dvc.objects.stage import stage

        with self._make_repo(locked=locked) as repo:
            path_info = PathInfo(repo.root_dir) / self.def_path
            return stage(
                self.repo.odb.local,
                path_info,
                repo.repo_fs,
                follow_subrepos=False,
            ).hash_info
Пример #23
0
def test_get_hash_dirty_file(tmp_dir, dvc):
    tmp_dir.dvc_gen("file", "file")
    (tmp_dir / "file").write_text("something")

    fs = DvcFileSystem(repo=dvc)
    expected = "8c7dd922ad47494fc02c388e12c00eac"
    assert fs.info((tmp_dir / "file").fs_path).get("md5") == expected
    _, _, obj = stage(
        dvc.odb.local, (tmp_dir / "file").fs_path, fs, "md5", dry_run=True
    )
    assert obj.hash_info == HashInfo("md5", expected)
Пример #24
0
def test_get_hash_dirty_file(tmp_dir, dvc):
    from dvc.objects import check
    from dvc.objects.errors import ObjectFormatError
    from dvc.objects.stage import get_file_hash

    tmp_dir.dvc_gen("file", "file")
    file_hash_info = HashInfo("md5", "8c7dd922ad47494fc02c388e12c00eac")

    (tmp_dir / "file").write_text("something")
    something_hash_info = HashInfo("md5", "437b930db84b8079c2dd804a71936b5f")

    clean_staging()

    # file is modified in workspace
    # get_file_hash(file) should return workspace hash, not DVC cached hash
    fs = RepoFileSystem(repo=dvc)
    assert fs.info(PathInfo(tmp_dir) / "file").get("md5") is None
    staging, _, obj = stage(dvc.odb.local,
                            PathInfo(tmp_dir) / "file", fs, "md5")
    assert obj.hash_info == something_hash_info
    check(staging, obj)

    # file is removed in workspace
    # any staged object referring to modified workspace obj is now invalid
    (tmp_dir / "file").unlink()
    with pytest.raises(ObjectFormatError):
        check(staging, obj)

    # get_file_hash(file) should return DVC cached hash
    assert fs.info(PathInfo(tmp_dir) / "file")["md5"] == file_hash_info.value
    _, hash_info = get_file_hash(PathInfo(tmp_dir) / "file",
                                 fs,
                                 "md5",
                                 state=dvc.state)
    assert hash_info == file_hash_info

    # tmp_dir/file can be staged even though it is missing in workspace since
    # repofs will use the DVC cached hash (and refer to the local cache object)
    _, _, obj = stage(dvc.odb.local, PathInfo(tmp_dir) / "file", fs, "md5")
    assert obj.hash_info == file_hash_info
Пример #25
0
def _fetch_external(self, repo_url, repo_rev, files, jobs):
    from dvc.external_repo import external_repo
    from dvc.objects import save
    from dvc.objects.stage import stage
    from dvc.path_info import PathInfo
    from dvc.scm.base import CloneError

    failed = 0

    results = []

    def cb(result):
        results.append(result)

    odb = self.odb.local
    try:
        with external_repo(repo_url, repo_rev,
                           cache_dir=odb.cache_dir) as repo:
            root = PathInfo(repo.root_dir)
            for path in files:
                path_info = root / path
                try:
                    used = repo.used_cache(
                        [os.fspath(path_info)],
                        force=True,
                        jobs=jobs,
                        recursive=True,
                    )
                    cb(repo.cloud.pull(used, jobs))
                except (NoOutputOrStageError, NoRemoteError):
                    pass
                obj = stage(
                    odb,
                    path_info,
                    repo.repo_fs,
                    "md5",
                    jobs=jobs,
                    follow_subrepos=False,
                )
                save(
                    odb,
                    obj,
                    jobs=jobs,
                    download_callback=cb,
                )
    except CloneError:
        failed += 1
        logger.exception("failed to fetch data for '{}'".format(
            ", ".join(files)))

    return sum(results), failed
Пример #26
0
def test_get_hash_mixed_dir(tmp_dir, scm, dvc):
    tmp_dir.gen({"dir": {"foo": "foo", "bar": "bar"}})
    tmp_dir.dvc.add(os.path.join("dir", "foo"))
    tmp_dir.scm.add([
        os.path.join("dir", "bar"),
        os.path.join("dir", ".gitignore"),
        os.path.join("dir", "foo.dvc"),
    ])
    tmp_dir.scm.commit("add dir")

    fs = RepoFileSystem(dvc)
    actual = stage(dvc.odb.local, PathInfo(tmp_dir) / "dir", fs).hash_info
    expected = HashInfo("md5", "e1d9e8eae5374860ae025ec84cfd85c7.dir")
    assert actual == expected
Пример #27
0
def test_get_hash_mixed_dir(tmp_dir, scm, dvc):
    tmp_dir.gen({"dir": {"foo": "foo", "bar": "bar"}})
    tmp_dir.dvc.add(os.path.join("dir", "foo"))
    tmp_dir.scm.add([
        os.path.join("dir", "bar"),
        os.path.join("dir", ".gitignore"),
        os.path.join("dir", "foo.dvc"),
    ])
    tmp_dir.scm.commit("add dir")
    clean_staging()

    fs = RepoFileSystem(repo=dvc)
    _, _, obj = stage(dvc.odb.local, PathInfo(tmp_dir) / "dir", fs, "md5")
    assert obj.hash_info == HashInfo("md5",
                                     "e1d9e8eae5374860ae025ec84cfd85c7.dir")
Пример #28
0
    def download(self, to, jobs=None):
        from dvc.checkout import checkout
        from dvc.objects import save
        from dvc.objects.stage import stage

        odb = self.repo.odb.local

        with self._make_repo(cache_dir=odb.cache_dir) as repo:
            if self.def_repo.get(self.PARAM_REV_LOCK) is None:
                self.def_repo[self.PARAM_REV_LOCK] = repo.get_rev()
            path_info = PathInfo(repo.root_dir) / self.def_path
            obj = stage(
                odb, path_info, repo.repo_fs, jobs=jobs, follow_subrepos=False,
            )
            save(odb, obj, jobs=jobs)

        checkout(to.path_info, to.fs, obj, odb)
Пример #29
0
Файл: repo.py Проект: ush98/dvc
    def get_used_objs(self,
                      **kwargs) -> Dict[Optional["ObjectDB"], Set["HashFile"]]:
        from dvc.config import NoRemoteError
        from dvc.exceptions import NoOutputOrStageError, PathMissingError
        from dvc.objects.db.git import GitObjectDB
        from dvc.objects.stage import stage

        local_odb = self.repo.odb.local
        locked = kwargs.pop("locked", True)
        with self._make_repo(locked=locked,
                             cache_dir=local_odb.cache_dir) as repo:
            used_objs = defaultdict(set)
            rev = repo.get_rev()
            if locked and self.def_repo.get(self.PARAM_REV_LOCK) is None:
                self.def_repo[self.PARAM_REV_LOCK] = rev

            path_info = PathInfo(repo.root_dir) / str(self.def_path)
            try:
                for odb, objs in repo.used_objs(
                    [os.fspath(path_info)],
                        force=True,
                        jobs=kwargs.get("jobs"),
                        recursive=True,
                ).items():
                    if odb is None:
                        odb = repo.cloud.get_remote().odb
                    self._check_circular_import(odb)
                    used_objs[odb].update(objs)
            except (NoRemoteError, NoOutputOrStageError):
                pass

            try:
                staged_obj = stage(
                    local_odb,
                    path_info,
                    repo.repo_fs,
                    local_odb.fs.PARAM_CHECKSUM,
                )
            except FileNotFoundError as exc:
                raise PathMissingError(self.def_path,
                                       self.def_repo[self.PARAM_URL]) from exc

            self._staged_objs[rev] = staged_obj
            git_odb = GitObjectDB(repo.repo_fs, repo.root_dir)
            used_objs[git_odb].add(staged_obj)
            return used_objs
Пример #30
0
def test_subrepos_are_ignored(tmp_dir, erepo_dir):
    subrepo = erepo_dir / "dir" / "subrepo"
    make_subrepo(subrepo, erepo_dir.scm)
    with erepo_dir.chdir():
        erepo_dir.dvc_gen("dir/foo", "foo", commit="foo")
        erepo_dir.scm_gen("dir/bar", "bar", commit="bar")

    with subrepo.chdir():
        subrepo.dvc_gen({"file": "file"}, commit="add files on subrepo")

    with external_repo(os.fspath(erepo_dir)) as repo:
        repo.repo_fs.download(
            PathInfo(repo.root_dir) / "dir",
            PathInfo(tmp_dir / "out"),
            follow_subrepos=False,
        )
        expected_files = {"foo": "foo", "bar": "bar", ".gitignore": "/foo\n"}
        assert (tmp_dir / "out").read_text() == expected_files

        # clear cache to test saving to cache
        cache_dir = tmp_dir / repo.odb.local.cache_dir
        remove(cache_dir)
        clean_staging()
        makedirs(cache_dir)

        staging, _, obj = stage(
            repo.odb.local,
            PathInfo(repo.root_dir) / "dir",
            repo.repo_fs,
            "md5",
            dvcignore=repo.dvcignore,
        )
        transfer(
            staging,
            repo.odb.local,
            {obj.hash_info},
            shallow=False,
            move=True,
        )
        assert set(cache_dir.glob("??/*")) == {
            cache_dir / "e1" / "d9e8eae5374860ae025ec84cfd85c7.dir",
            cache_dir / "37" / "b51d194a7513e45b56f6524f2d51f2",
            cache_dir / "94" / "7d2b84e5aa88170e80dff467a5bfb6",
            cache_dir / "ac" / "bd18db4cc2f85cedef654fccc4a4d8",
        }