def test_read_with_subrepos(tmp_dir, scm, local_cloud, local_repo): tmp_dir.scm_gen("foo.txt", "foo.txt", commit="add foo.txt") subrepo = tmp_dir / "dir" / "subrepo" make_subrepo(subrepo, scm, config=local_cloud.config) with subrepo.chdir(): subrepo.scm_gen({"lorem": "lorem"}, commit="add lorem") subrepo.dvc_gen({"dir": {"file.txt": "file.txt"}}, commit="add dir") subrepo.dvc_gen("dvc-file", "dvc-file", commit="add dir") subrepo.dvc.push() repo_path = None if local_repo else f"file:///{tmp_dir}" subrepo_path = os.path.join("dir", "subrepo") assert api.read("foo.txt", repo=repo_path) == "foo.txt" assert ( api.read(os.path.join(subrepo_path, "lorem"), repo=repo_path) == "lorem" ) assert ( api.read(os.path.join(subrepo_path, "dvc-file"), repo=repo_path) == "dvc-file" ) assert ( api.read(os.path.join(subrepo_path, "dir", "file.txt"), repo=repo_path) == "file.txt" )
def test_update_from_subrepos(tmp_dir, dvc, erepo_dir, is_dvc): subrepo = erepo_dir / "subrepo" make_subrepo(subrepo, erepo_dir.scm) gen = subrepo.dvc_gen if is_dvc else subrepo.scm_gen with subrepo.chdir(): gen("foo", "foo", commit="subrepo initial") path = os.path.join("subrepo", "foo") repo_path = os.fspath(erepo_dir) dvc.imp(repo_path, path, out="out") assert dvc.status() == {} with subrepo.chdir(): gen("foo", "foobar", commit="subrepo second commit") assert dvc.status()["out.dvc"][0]["changed deps"] == { f"{path} ({repo_path})": "update available" } (stage, ) = dvc.update(["out.dvc"]) assert (tmp_dir / "out").read_text() == "foobar" assert stage.deps[0].def_path == os.path.join("subrepo", "foo") assert stage.deps[0].def_repo == { "url": repo_path, "rev_lock": erepo_dir.scm.get_rev(), }
def test_hook_is_called(tmp_dir, erepo_dir, mocker): subrepo_paths = [ "subrepo1", "subrepo2", os.path.join("dir", "subrepo3"), os.path.join("dir", "subrepo4"), "subrepo5", os.path.join("subrepo5", "subrepo6"), ] subrepos = [erepo_dir / path for path in subrepo_paths] for repo in subrepos: make_subrepo(repo, erepo_dir.scm) for repo in subrepos + [erepo_dir]: with repo.chdir(): repo.scm_gen("foo", "foo", commit=f"git add {repo}/foo") repo.dvc_gen("bar", "bar", commit=f"dvc add {repo}/bar") with external_repo(str(erepo_dir)) as repo: spy = mocker.spy(repo.repo_fs.fs, "repo_factory") list(repo.repo_fs.walk("", ignore_subrepos=False)) # drain assert spy.call_count == len(subrepos) paths = [os.path.join(repo.root_dir, path) for path in subrepo_paths] spy.assert_has_calls( [ call( path, fs=repo.fs, repo_factory=repo.repo_fs.fs.repo_factory, ) for path in paths ], any_order=True, )
def test_granular_get_from_subrepos(tmp_dir, erepo_dir): subrepo = erepo_dir / "subrepo" make_subrepo(subrepo, erepo_dir.scm) with subrepo.chdir(): subrepo.dvc_gen({"dir": {"bar": "bar"}}, commit="files in subrepo") path = os.path.join("subrepo", "dir", "bar") Repo.get(os.fspath(erepo_dir), path, out="out") assert (tmp_dir / "out").read_text() == "bar"
def test_get_from_subrepos(tmp_dir, erepo_dir, is_dvc, files): subrepo = erepo_dir / "subrepo" make_subrepo(subrepo, erepo_dir.scm) gen = subrepo.dvc_gen if is_dvc else subrepo.scm_gen with subrepo.chdir(): gen(files, commit="add files in subrepo") key = next(iter(files)) Repo.get(os.fspath(erepo_dir), f"subrepo/{key}", out="out") assert (tmp_dir / "out").read_text() == files[key]
def test_granular_import_from_subrepos(tmp_dir, dvc, erepo_dir): subrepo = erepo_dir / "subrepo" make_subrepo(subrepo, erepo_dir.scm) with subrepo.chdir(): subrepo.dvc_gen({"dir": {"bar": "bar"}}, commit="files in subrepo") path = os.path.join("subrepo", "dir", "bar") stage = dvc.imp(os.fspath(erepo_dir), path, out="out") assert (tmp_dir / "out").read_text() == "bar" assert stage.deps[0].def_path == path assert stage.deps[0].def_repo == { "url": os.fspath(erepo_dir), "rev_lock": erepo_dir.scm.get_rev(), }
def test_get_url_subrepos(tmp_dir, scm, local_cloud): subrepo = tmp_dir / "subrepo" make_subrepo(subrepo, scm, config=local_cloud.config) with subrepo.chdir(): subrepo.dvc_gen( {"dir": {"foo": "foo"}, "bar": "bar"}, commit="add files" ) subrepo.dvc.push() path = os.path.relpath(local_cloud.config["url"]) expected_url = os.path.join(path, "ac", "bd18db4cc2f85cedef654fccc4a4d8") assert api.get_url(os.path.join("subrepo", "dir", "foo")) == expected_url expected_url = os.path.join(path, "37", "b51d194a7513e45b56f6524f2d51f2") assert api.get_url("subrepo/bar") == expected_url
def test_subrepos_are_ignored_for_git_tracked_dirs(tmp_dir, erepo_dir): subrepo = erepo_dir / "dir" / "subrepo" make_subrepo(subrepo, erepo_dir.scm) with erepo_dir.chdir(): scm_files = {"foo": "foo", "bar": "bar", "subdir": {"lorem": "lorem"}} erepo_dir.scm_gen({"dir": scm_files}, commit="add scm dir") with subrepo.chdir(): subrepo.dvc_gen({"file": "file"}, commit="add files on subrepo") with external_repo(os.fspath(erepo_dir)) as repo: repo.repo_fs.download( os.path.join(repo.root_dir, "dir"), os.fspath(tmp_dir / "out"), ) # subrepo files should not be here assert (tmp_dir / "out").read_text() == scm_files
def test_metadata_on_subrepos(make_tmp_dir, tmp_dir, dvc, scm, repo_fs): subrepo = tmp_dir / "subrepo" make_subrepo(subrepo, scm) subrepo.scm_gen("foo", "foo", commit="add foo on subrepo") subrepo.dvc_gen("foobar", "foobar", commit="add foobar on subrepo") for path in ["subrepo", "subrepo/foo", "subrepo/foobar"]: meta = repo_fs.metadata(tmp_dir / path) assert meta.repo.root_dir == str( subrepo), f"repo root didn't match for {path}" # supports external outputs on top-level DVC repo external_dir = make_tmp_dir("external-output") external_dir.gen("bar", "bar") dvc.add(str(external_dir / "bar"), external=True) meta = repo_fs.metadata(external_dir / "bar") assert meta.repo.root_dir == str(tmp_dir)
def test_subrepos_are_ignored(tmp_dir, erepo_dir): subrepo = erepo_dir / "dir" / "subrepo" make_subrepo(subrepo, erepo_dir.scm) with erepo_dir.chdir(): erepo_dir.dvc_gen("dir/foo", "foo", commit="foo") erepo_dir.scm_gen("dir/bar", "bar", commit="bar") with subrepo.chdir(): subrepo.dvc_gen({"file": "file"}, commit="add files on subrepo") with external_repo(os.fspath(erepo_dir)) as repo: repo.repo_fs.download( PathInfo(repo.root_dir) / "dir", PathInfo(tmp_dir / "out"), follow_subrepos=False, ) expected_files = {"foo": "foo", "bar": "bar", ".gitignore": "/foo\n"} assert (tmp_dir / "out").read_text() == expected_files # clear cache to test saving to cache cache_dir = tmp_dir / repo.odb.local.cache_dir remove(cache_dir) clean_staging() makedirs(cache_dir) staging, _, obj = stage( repo.odb.local, PathInfo(repo.root_dir) / "dir", repo.repo_fs, "md5", dvcignore=repo.dvcignore, ) transfer( staging, repo.odb.local, {obj.hash_info}, shallow=False, move=True, ) assert set(cache_dir.glob("??/*")) == { cache_dir / "e1" / "d9e8eae5374860ae025ec84cfd85c7.dir", cache_dir / "37" / "b51d194a7513e45b56f6524f2d51f2", cache_dir / "94" / "7d2b84e5aa88170e80dff467a5bfb6", cache_dir / "ac" / "bd18db4cc2f85cedef654fccc4a4d8", }
def test_import_subrepos(tmp_dir, erepo_dir, dvc, scm, is_dvc, files): subrepo = erepo_dir / "subrepo" make_subrepo(subrepo, erepo_dir.scm) gen = subrepo.dvc_gen if is_dvc else subrepo.scm_gen with subrepo.chdir(): gen(files, commit="add files in subrepo") key = next(iter(files)) path = str((subrepo / key).relative_to(erepo_dir)) stage = dvc.imp(os.fspath(erepo_dir), path, out="out") assert (tmp_dir / "out").read_text() == files[key] assert stage.deps[0].def_path == path assert stage.deps[0].def_repo == { "url": os.fspath(erepo_dir), "rev_lock": erepo_dir.scm.get_rev(), }
def test_subrepo(tmp_dir, scm, workspace): from tests.unit.fs.test_repo import make_subrepo subrepo = tmp_dir / "dir" / "repo" make_subrepo(subrepo, scm) subrepo.gen("copy.py", COPY_SCRIPT) subrepo.gen("params.yaml", "foo: 1") with subrepo.chdir(): subrepo.dvc.run( cmd="python copy.py params.yaml metrics.yaml", metrics_no_cache=["metrics.yaml"], params=["foo"], name="copy-file", no_exec=True, ) scm.add( [ subrepo / "dvc.yaml", subrepo / "copy.py", subrepo / "params.yaml", ] ) scm.commit("init") results = subrepo.dvc.experiments.run( PIPELINE_FILE, params=["foo=2"], tmp_dir=not workspace ) assert results exp = first(results) ref_info = first(exp_refs_by_rev(scm, exp)) fs = scm.get_fs(exp) for fname in ["metrics.yaml", "dvc.lock"]: assert fs.exists(subrepo / fname) with fs.open(subrepo / "metrics.yaml") as fobj: assert fobj.read().strip() == "foo: 2" assert subrepo.dvc.experiments.get_exact_name(exp) == ref_info.name assert scm.resolve_rev(ref_info.name) == exp
def test_import_complete_repo(tmp_dir, dvc, erepo_dir): with erepo_dir.chdir(): erepo_dir.dvc_gen({"foo": "foo"}, commit="add foo") subrepo = erepo_dir / "subrepo" make_subrepo(subrepo, erepo_dir.scm) with subrepo.chdir(): subrepo.dvc_gen({"dir": {"bar": "bar"}}, commit="files in subrepo") dvc.imp(os.fspath(erepo_dir), "subrepo", out="out_sub") assert (tmp_dir / "out_sub").read_text() == { ".gitignore": "/dir\n", "dir": {"bar": "bar"}, } dvc.imp(os.fspath(erepo_dir), os.curdir, out="out") assert (tmp_dir / "out").read_text() == { ".gitignore": "/foo\n", "foo": "foo", }
def test_subrepo_is_constructed_properly( tmp_dir, scm, mocker, make_tmp_dir, root_is_dvc ): if root_is_dvc: make_subrepo(tmp_dir, scm) subrepo = tmp_dir / "subrepo" make_subrepo(subrepo, scm) local_cache = subrepo.dvc.odb.local.cache_dir tmp_dir.scm_gen("bar", "bar", commit="add bar") subrepo.dvc_gen("foo", "foo", commit="add foo") cache_dir = make_tmp_dir("temp-cache") with external_repo( str(tmp_dir), cache_dir=str(cache_dir), cache_types=["symlink"] ) as repo: spy = mocker.spy(repo.repo_fs, "repo_factory") list(repo.repo_fs.walk(repo.root_dir, ignore_subrepos=False)) # drain assert spy.call_count == 1 subrepo = spy.spy_return assert repo.url == str(tmp_dir) assert repo.config["cache"]["dir"] == str(cache_dir) assert repo.odb.local.cache_dir == str(cache_dir) assert subrepo.odb.local.cache_dir == str(cache_dir) assert repo.config["cache"]["type"] == ["symlink"] assert repo.odb.local.cache_types == ["symlink"] assert subrepo.odb.local.cache_types == ["symlink"] assert ( subrepo.config["remote"]["auto-generated-upstream"]["url"] == local_cache ) if root_is_dvc: main_cache = tmp_dir.dvc.odb.local.cache_dir assert repo.config["remote"]["auto-generated-upstream"][ "url" ] == str(main_cache)
def test_pull_imported_stage_from_subrepos(tmp_dir, dvc, erepo_dir, is_dvc, files): subrepo = erepo_dir / "subrepo" make_subrepo(subrepo, erepo_dir.scm) gen = subrepo.dvc_gen if is_dvc else subrepo.scm_gen with subrepo.chdir(): gen(files, commit="files in subrepo") key = first(files) path = os.path.join("subrepo", key) dvc.imp(os.fspath(erepo_dir), path, out="out") # clean everything remove(dvc.odb.local.cache_dir) remove("out") makedirs(dvc.odb.local.cache_dir) stats = dvc.pull(["out.dvc"]) expected = [f"out{os.sep}"] if isinstance(files[key], dict) else ["out"] assert stats["added"] == expected assert (tmp_dir / "out").read_text() == files[key]
def test_info_on_subrepos(make_tmp_dir, tmp_dir, dvc, scm, repo_fs): subrepo = tmp_dir / "subrepo" make_subrepo(subrepo, scm) with subrepo.chdir(): subrepo.scm_gen("foo", "foo", commit="add foo on subrepo") subrepo.dvc_gen("foobar", "foobar", commit="add foobar on subrepo") for path in [ "subrepo", os.path.join("subrepo", "foo"), os.path.join("subrepo", "foobar"), ]: info = repo_fs.info(tmp_dir / path) assert info["repo"].root_dir == str( subrepo), f"repo root didn't match for {path}" # supports external outputs on top-level DVC repo external_dir = make_tmp_dir("external-output") external_dir.gen("bar", "bar") dvc.add(str(external_dir / "bar"), external=True) info = repo_fs.info((external_dir / "bar").fs_path) assert info["repo"].root_dir == str(tmp_dir)