def tearDown(self): self._popd() try: remove(self._root_dir) except OSError as exc: # We ignore this under Windows with a warning because it happened # to be really hard to trace all not properly closed files. # # Best guess so far is that gitpython is the culprit: # it opens files and uses __del__ to close them, which can happen # late in current pythons. TestGitFixture and TestDvcFixture try # to close that and it works on most of the tests, but not all. # Repos and thus git repos are created all over the dvc ;) if os.name == "nt" and exc.winerror == 32: warnings.warn("Failed to remove test dir: " + str(exc)) else: raise
def get(url, path, out=None, rev=None): out = resolve_output(path, out) if Stage.is_valid_filename(out): raise GetDVCFileError() # Creating a directory right beside the output to make sure that they # are on the same filesystem, so we could take the advantage of # reflink and/or hardlink. Not using tempfile.TemporaryDirectory # because it will create a symlink to tmpfs, which defeats the purpose # and won't work with reflink/hardlink. dpath = os.path.dirname(os.path.abspath(out)) tmp_dir = os.path.join(dpath, "." + str(shortuuid.uuid())) try: try: with external_repo(cache_dir=tmp_dir, url=url, rev=rev) as repo: # Try any links possible to avoid data duplication. # # Not using symlink, because we need to remove cache after we # are done, and to make that work we would have to copy data # over anyway before removing the cache, so we might just copy # it right away. # # Also, we can't use theoretical "move" link type here, because # the same cache file might be used a few times in a directory. repo.cache.local.cache_types = ["reflink", "hardlink", "copy"] output = repo.find_out_by_relpath(path) if output.use_cache: _get_cached(repo, output, out) return # Non-cached output, fall through and try to copy from git. except (NotDvcRepoError, NoOutputInExternalRepoError): # Not a DVC repository or, possibly, path is not tracked by DVC. # Fall through and try to copy from git. pass if os.path.isabs(path): raise FileNotFoundError repo_dir = cached_clone(url, rev=rev) fs_copy(os.path.join(repo_dir, path), out) except (OutputNotFoundError, FileNotFoundError): raise PathMissingError(path, url) finally: remove(tmp_dir)
def test_pull_no_rev_lock(erepo_dir, tmp_dir, dvc): with erepo_dir.chdir(): erepo_dir.dvc_gen("foo", "contents", commit="create foo") stage = dvc.imp(os.fspath(erepo_dir), "foo", "foo_imported") assert "rev" not in stage.deps[0].def_repo stage.deps[0].def_repo.pop("rev_lock") Dvcfile(dvc, stage.path).dump(stage) remove(stage.outs[0].cache_path) (tmp_dir / "foo_imported").unlink() dvc.pull([stage.path]) assert (tmp_dir / "foo_imported").is_file() assert (tmp_dir / "foo_imported").read_text() == "contents"
def get(url, path, out=None, rev=None): out = resolve_output(path, out) if Stage.is_valid_filename(out): raise GetDVCFileError() # Creating a directory right beside the output to make sure that they # are on the same filesystem, so we could take the advantage of # reflink and/or hardlink. Not using tempfile.TemporaryDirectory # because it will create a symlink to tmpfs, which defeats the purpose # and won't work with reflink/hardlink. dpath = os.path.dirname(os.path.abspath(out)) tmp_dir = os.path.join(dpath, "." + str(shortuuid.uuid())) try: with external_repo(cache_dir=tmp_dir, url=url, rev=rev) as repo: # Try any links possible to avoid data duplication. # # Not using symlink, because we need to remove cache after we are # done, and to make that work we would have to copy data over # anyway before removing the cache, so we might just copy it # right away. # # Also, we can't use theoretical "move" link type here, because # the same cache file might be used a few times in a directory. repo.cache.local.cache_types = ["reflink", "hardlink", "copy"] try: output = repo.find_out_by_relpath(path) except OutputNotFoundError: output = None if output and output.use_cache: _get_cached(repo, output, out) else: # Either an uncached out with absolute path or a user error if os.path.isabs(path): raise FileNotFoundError _copy(os.path.join(repo.root_dir, path), out) except (OutputNotFoundError, FileNotFoundError): raise PathMissingError(path, url) except NotDvcRepoError: raise UrlNotDvcRepoError(url) finally: remove(tmp_dir)
def test_push_incomplete_dir(tmp_dir, dvc, mocker, local_remote): (stage, ) = tmp_dir.dvc_gen({"dir": {"foo": "foo", "bar": "bar"}}) remote = dvc.cloud.get_remote("upstream") cache = dvc.cache.local dir_hash = stage.outs[0].checksum used = stage.get_used_cache(remote=remote) # remove one of the cache files for directory file_hashes = list(used.child_keys(cache.tree.scheme, dir_hash)) remove(cache.tree.hash_to_path_info(file_hashes[0])) dvc.push() assert not remote.tree.exists(remote.tree.hash_to_path_info(dir_hash)) assert not remote.tree.exists(remote.tree.hash_to_path_info( file_hashes[0])) assert remote.tree.exists(remote.tree.hash_to_path_info(file_hashes[1]))
def test_pull_non_workspace(tmp_dir, scm, dvc, erepo_dir): with erepo_dir.chdir(): erepo_dir.dvc_gen("foo", "master content", commit="create foo") with erepo_dir.branch("branch", new=True): erepo_dir.dvc_gen("foo", "branch content", commit="modify foo") stage = dvc.imp(os.fspath(erepo_dir), "foo", "foo_imported", rev="branch") tmp_dir.scm_add([stage.relpath], commit="imported branch") scm.tag("ref-to-branch") # Overwrite via import dvc.imp(os.fspath(erepo_dir), "foo", "foo_imported", rev="master") remove(stage.outs[0].cache_path) dvc.fetch(all_tags=True) assert os.path.exists(stage.outs[0].cache_path)
def test_git_ssh(tmp_dir, scm, server): from dulwich.repo import Repo as DulwichRepo from sshfs import SSHFileSystem from dvc.utils.fs import remove from tests.remotes.ssh import TEST_SSH_KEY_PATH, TEST_SSH_USER fs = SSHFileSystem( host=server.host, port=server.port, username=TEST_SSH_USER, client_keys=[TEST_SSH_KEY_PATH], ) server._ssh.execute("git init --bare test-repo.git") url = f"ssh://{TEST_SSH_USER}@{server.host}:{server.port}/~/test-repo.git" tmp_dir.scm_gen("foo", "foo", commit="init") rev = scm.get_rev() scm.push_refspec( url, "refs/heads/master", "refs/heads/master", force=True, key_filename=TEST_SSH_KEY_PATH, ) assert ( rev.encode("ascii") == fs.open("test-repo.git/refs/heads/master").read().strip() ) remove(tmp_dir / ".git") remove(tmp_dir / "foo") DulwichRepo.init(str(tmp_dir)) scm.fetch_refspecs( url, ["refs/heads/master"], force=True, key_filename=TEST_SSH_KEY_PATH, ) assert rev == scm.get_ref("refs/heads/master") scm.checkout("master") assert "foo" == (tmp_dir / "foo").read_text()
def test_subrepos_are_ignored(tmp_dir, erepo_dir): subrepo = erepo_dir / "dir" / "subrepo" make_subrepo(subrepo, erepo_dir.scm) with erepo_dir.chdir(): erepo_dir.dvc_gen("dir/foo", "foo", commit="foo") erepo_dir.scm_gen("dir/bar", "bar", commit="bar") with subrepo.chdir(): subrepo.dvc_gen({"file": "file"}, commit="add files on subrepo") with external_repo(os.fspath(erepo_dir)) as repo: repo.dvcfs.get( "dir", os.fspath(tmp_dir / "out"), ) expected_files = {"foo": "foo", "bar": "bar", ".gitignore": "/foo\n"} assert (tmp_dir / "out").read_text() == expected_files # clear cache to test saving to cache cache_dir = tmp_dir / repo.odb.local.cache_dir remove(cache_dir) clean_staging() makedirs(cache_dir) staging, _, obj = stage( repo.odb.local, "dir", repo.dvcfs, "md5", ignore=repo.dvcignore, ) transfer( staging, repo.odb.local, {obj.hash_info}, shallow=False, hardlink=True, ) assert set(cache_dir.glob("??/*")) == { cache_dir / "e1" / "d9e8eae5374860ae025ec84cfd85c7", cache_dir / "e1" / "d9e8eae5374860ae025ec84cfd85c7.dir", cache_dir / "37" / "b51d194a7513e45b56f6524f2d51f2", cache_dir / "94" / "7d2b84e5aa88170e80dff467a5bfb6", cache_dir / "ac" / "bd18db4cc2f85cedef654fccc4a4d8", }
def checkout_exp(self, rev, **kwargs): """Checkout an experiment to the user's workspace.""" from git.exc import GitCommandError from dvc.repo.checkout import checkout as dvc_checkout baseline_rev = self._check_baseline(rev) self._scm_checkout(rev) branch = self._get_branch_containing(rev) m = self.BRANCH_RE.match(branch) if branch else None if m and m.group("checkpoint"): kwargs.update({"allow_missing": True, "quiet": True}) tmp = tempfile.NamedTemporaryFile(delete=False).name self.scm.repo.head.commit.diff(baseline_rev, patch=True, full_index=True, binary=True, output=tmp) dirty = self.repo.scm.is_dirty(untracked_files=True) if dirty: logger.debug("Stashing workspace changes.") self.repo.scm.repo.git.stash("push", "--include-untracked") try: if os.path.getsize(tmp): logger.debug("Patching local workspace") self.repo.scm.repo.git.apply(tmp, reverse=True) need_checkout = True else: need_checkout = False except GitCommandError: raise DvcException("failed to apply experiment changes.") finally: remove(tmp) if dirty: self._unstash_workspace() args_file = os.path.join(self.repo.tmp_dir, self.PACKED_ARGS_FILE) if os.path.exists(args_file): remove(args_file) if need_checkout: dvc_checkout(self.repo, **kwargs)
def _pull_cached(self, out, path_info, dest): with self.state: tmp = PathInfo(tmp_fname(dest)) src = tmp / path_info.relative_to(out.path_info) out.path_info = tmp # Only pull unless all needed cache is present if out.changed_cache(filter_info=src): self.cloud.pull(out.get_used_cache(filter_info=src)) failed = out.checkout(filter_info=src) move(src, dest) remove(tmp) if failed: raise FileNotFoundError
def _stash_exp(self, *args, **kwargs): """Stash changes from the current (parent) workspace as an experiment. """ rev = self.scm.get_rev() tmp = tempfile.NamedTemporaryFile(delete=False).name try: self.repo.scm.repo.git.diff(patch=True, output=tmp) if os.path.getsize(tmp): logger.debug("Patching experiment workspace") self.scm.repo.git.apply(tmp) else: raise UnchangedExperimentError(rev) finally: remove(tmp) self._pack_args(*args, **kwargs) msg = f"{self.STASH_MSG_PREFIX}{rev}" self.scm.repo.git.stash("push", "-m", msg) return self.scm.resolve_rev("stash@{0}")
def test_missing_cache(tmp_dir, dvc, run_copy_metrics): tmp_dir.gen("metrics_t.yaml", "1.1") run_copy_metrics( "metrics_t.yaml", "metrics.yaml", metrics=["metrics.yaml"], ) # This one should be skipped stage = run_copy_metrics( "metrics_t.yaml", "metrics2.yaml", metrics=["metrics2.yaml"], ) remove(stage.outs[0].fspath) remove(stage.outs[0].cache_path) assert dvc.metrics.show() == {"": {"metrics.yaml": 1.1}}
def test_diff_no_cache(tmp_dir, scm, dvc): tmp_dir.dvc_gen({"dir": {"file": "file content"}}, commit="first") scm.tag("v1") tmp_dir.dvc_gen( {"dir": {"file": "modified file content"}}, commit="second" ) scm.tag("v2") remove(dvc.cache.local.cache_dir) # invalidate_dir_info to force cache loading dvc.cache.local._dir_info = {} diff = dvc.diff("v1", "v2") assert diff["added"] == [] assert diff["deleted"] == [] assert first(diff["modified"])["path"] == os.path.join("dir", "")
def test_pipeline_file_target_ops(tmp_dir, dvc, run_copy, local_remote): path = local_remote.url tmp_dir.dvc_gen("foo", "foo") run_copy("foo", "bar", single_stage=True) tmp_dir.dvc_gen("lorem", "lorem") run_copy("lorem", "lorem2", name="copy-lorem-lorem2") tmp_dir.dvc_gen("ipsum", "ipsum") run_copy("ipsum", "baz", name="copy-ipsum-baz") outs = ["foo", "bar", "lorem", "ipsum", "baz", "lorem2"] remove(dvc.stage_cache.cache_dir) dvc.push() outs = ["foo", "bar", "lorem", "ipsum", "baz", "lorem2"] # each one's a copy of other, hence 3 assert len(recurse_list_dir(path)) == 3 clean(outs, dvc) assert set(dvc.pull(["dvc.yaml"])["added"]) == {"lorem2", "baz"} clean(outs, dvc) assert set(dvc.pull()["added"]) == set(outs) # clean everything in remote and push from tests.dir_helpers import TmpDir clean(TmpDir(path).iterdir()) dvc.push(["dvc.yaml:copy-ipsum-baz"]) assert len(recurse_list_dir(path)) == 1 clean(TmpDir(path).iterdir()) dvc.push(["dvc.yaml"]) assert len(recurse_list_dir(path)) == 2 with pytest.raises(StageNotFound): dvc.push(["dvc.yaml:StageThatDoesNotExist"]) with pytest.raises(StageNotFound): dvc.pull(["dvc.yaml:StageThatDoesNotExist"])
def get(url, path, out=None, rev=None): out = resolve_output(path, out) if Stage.is_valid_filename(out): raise GetDVCFileError() # Creating a directory right beside the output to make sure that they # are on the same filesystem, so we could take the advantage of # reflink and/or hardlink. Not using tempfile.TemporaryDirectory # because it will create a symlink to tmpfs, which defeats the purpose # and won't work with reflink/hardlink. dpath = os.path.dirname(os.path.abspath(out)) tmp_dir = os.path.join(dpath, "." + str(shortuuid.uuid())) try: with external_repo(cache_dir=tmp_dir, url=url, rev=rev) as repo: # Note: we need to replace state, because in case of getting DVC # dependency on CIFS or NFS filesystems, sqlite-based state # will be unable to obtain lock repo.state = StateNoop() # Try any links possible to avoid data duplication. # # Not using symlink, because we need to remove cache after we are # done, and to make that work we would have to copy data over # anyway before removing the cache, so we might just copy it # right away. # # Also, we can't use theoretical "move" link type here, because # the same cache file might be used a few times in a directory. repo.cache.local.cache_types = ["reflink", "hardlink", "copy"] o = repo.find_out_by_relpath(path) with repo.state: repo.cloud.pull(o.get_used_cache()) o.path_info = PathInfo(os.path.abspath(out)) with o.repo.state: o.checkout() except NotDvcRepoError: raise UrlNotDvcRepoError(url) except OutputNotFoundError: raise OutputNotFoundError(path) finally: remove(tmp_dir)
def test_rename_multiple_files_same_hashes(tmp_dir, scm, dvc): """Test diff by renaming >=2 instances of file with same hashes. DVC should be able to detect that they are renames, and should not include them in either of the `added` or the `deleted` section. """ tmp_dir.dvc_gen( {"dir": {"foo": "foo", "subdir": {"foo": "foo"}}}, commit="commit #1" ) remove(tmp_dir / "dir") # changing foo and subdir/foo to bar and subdir/bar respectively tmp_dir.dvc_gen( {"dir": {"bar": "foo", "subdir": {"bar": "foo"}}}, commit="commit #2" ) assert dvc.diff("HEAD~") == { "added": [], "deleted": [], "modified": [ { "hash": { "new": "31b36b3ea5f4485e27f10578c47183b0.dir", "old": "c7684c8b3b0d28cf80d5305e2d856bfc.dir", }, "path": os.path.join("dir", ""), } ], "not in cache": [], "renamed": [ { "hash": "acbd18db4cc2f85cedef654fccc4a4d8", "path": { "new": os.path.join("dir", "bar"), "old": os.path.join("dir", "foo"), }, }, { "hash": "acbd18db4cc2f85cedef654fccc4a4d8", "path": { "new": os.path.join("dir", "subdir", "bar"), "old": os.path.join("dir", "subdir", "foo"), }, }, ], }
def test_subrepos_are_ignored(tmp_dir, erepo_dir): subrepo = erepo_dir / "dir" / "subrepo" make_subrepo(subrepo, erepo_dir.scm) with erepo_dir.chdir(): erepo_dir.dvc_gen("dir/foo", "foo", commit="foo") erepo_dir.scm_gen("dir/bar", "bar", commit="bar") with subrepo.chdir(): subrepo.dvc_gen({"file": "file"}, commit="add files on subrepo") with external_repo(os.fspath(erepo_dir)) as repo: repo.repo_tree.download( PathInfo(repo.root_dir) / "dir", PathInfo(tmp_dir / "out"), follow_subrepos=False, ) expected_files = {"foo": "foo", "bar": "bar", ".gitignore": "/foo\n"} assert (tmp_dir / "out").read_text() == expected_files # clear cache to test saving to cache cache_dir = tmp_dir / repo.cache.local.cache_dir remove(cache_dir) makedirs(cache_dir) expected_hash = HashInfo("md5", "e1d9e8eae5374860ae025ec84cfd85c7.dir") assert ( repo.repo_tree.get_hash( os.path.join(repo.root_dir, "dir"), follow_subrepos=False ) == expected_hash ) repo.cache.local.save( PathInfo(repo.root_dir) / "dir", repo.repo_tree, expected_hash, link=False, ) assert set(cache_dir.glob("*/*")) == { cache_dir / "e1" / "d9e8eae5374860ae025ec84cfd85c7.dir", cache_dir / "37" / "b51d194a7513e45b56f6524f2d51f2", cache_dir / "94" / "7d2b84e5aa88170e80dff467a5bfb6", cache_dir / "ac" / "bd18db4cc2f85cedef654fccc4a4d8", }
def test_checkout_targets_deps(tmp_dir, scm, dvc, exp_stage): from dvc.utils.fs import remove tmp_dir.dvc_gen({"foo": "foo", "bar": "bar"}, commit="add files") stage = dvc.stage.add( cmd="python copy.py params.yaml metrics.yaml", metrics_no_cache=["metrics.yaml"], params=["foo"], name="copy-file", deps=["copy.py", "foo"], force=True, ) remove("foo") remove("bar") dvc.experiments.run(stage.addressing, params=["foo=2"]) assert (tmp_dir / "foo").exists() assert (tmp_dir / "foo").read_text() == "foo" assert not (tmp_dir / "bar").exists()
def _unprotect_file(path): if System.is_symlink(path) or System.is_hardlink(path): logger.debug("Unprotecting '{}'".format(path)) tmp = os.path.join(os.path.dirname(path), "." + uuid()) # The operations order is important here - if some application # would access the file during the process of copyfile then it # would get only the part of file. So, at first, the file should be # copied with the temporary name, and then original file should be # replaced by new. copyfile(path, tmp, name="Unprotecting '{}'".format(relpath(path))) remove(path) os.rename(tmp, path) else: logger.debug("Skipping copying for '{}', since it is not " "a symlink or a hardlink.".format(path)) os.chmod(path, os.stat(path).st_mode | stat.S_IWRITE)
def _reproduce(self, executors: dict, jobs: Optional[int] = 1) -> dict: """Run dvc repro for the specified ExperimentExecutors in parallel. Returns dict containing successfully executed experiments. """ result = {} with ProcessPoolExecutor(max_workers=jobs) as workers: futures = {} for rev, executor in executors.items(): future = workers.submit( executor.reproduce, executor.dvc_dir, cwd=executor.dvc.root_dir, **executor.repro_kwargs, ) futures[future] = (rev, executor) for future in as_completed(futures): rev, executor = futures[future] exc = future.exception() if exc is None: exp_hash = future.result() logger.debug(f"ran exp based on {executor.baseline_rev}") self._scm_checkout(executor.baseline_rev) self._collect_output(executor.baseline_rev, executor) remove(self.args_file) try: exp_rev = self._commit(exp_hash) except UnchangedExperimentError: logger.debug( "Experiment '%s' identical to baseline '%s'", rev, executor.baseline_rev, ) exp_rev = executor.baseline_rev logger.info("Reproduced experiment '%s'.", exp_rev[:7]) result[rev] = {exp_rev: exp_hash} else: logger.exception("Failed to reproduce experiment '%s'", rev) executor.cleanup() return result
def test_api_missing_local_cache_exists_on_remote( tmp_dir, scm, dvc, as_external, remote, files, to_read, ): tmp_dir.dvc_gen(files, commit="DVC track files") dvc.push() # Remove cache to make foo missing remove(dvc.cache.local.cache_dir) remove(first(files)) repo_url = f"file://{tmp_dir}" if as_external else None file_content = get_in(files, to_read.split(os.sep)) assert api.read(to_read, repo=repo_url) == file_content
def apply(repo: "Repo", rev: str, force: bool = True, **kwargs): from scmrepo.exceptions import SCMError as _SCMError from dvc.repo.checkout import checkout as dvc_checkout from dvc.scm import GitMergeError, RevError, resolve_rev exps = repo.experiments try: exp_rev = resolve_rev(repo.scm, rev) exps.check_baseline(exp_rev) except (RevError, BaselineMismatchError) as exc: raise InvalidExpRevError(rev) from exc stash_rev = exp_rev in exps.stash_revs if not stash_rev and not exps.get_branch_by_rev(exp_rev, allow_multiple=True): raise InvalidExpRevError(exp_rev) # NOTE: we don't use scmrepo's stash_workspace() here since we need # finer control over the merge behavior when we unstash everything with _apply_workspace(repo, rev, force): try: repo.scm.merge(exp_rev, commit=False, squash=True) except _SCMError as exc: raise GitMergeError(str(exc), scm=repo.scm) repo.scm.reset() if stash_rev: args_path = os.path.join(repo.tmp_dir, BaseExecutor.PACKED_ARGS_FILE) if os.path.exists(args_path): remove(args_path) dvc_checkout(repo, **kwargs) repo.scm.set_ref(EXEC_APPLY, exp_rev) logger.info( "Changes for experiment '%s' have been applied to your current " "workspace.", rev, )
def get(url, path, out=None, rev=None, jobs=None): import shortuuid from dvc.dvcfile import is_valid_filename from dvc.external_repo import external_repo out = resolve_output(path, out) if is_valid_filename(out): raise GetDVCFileError() # Creating a directory right beside the output to make sure that they # are on the same filesystem, so we could take the advantage of # reflink and/or hardlink. Not using tempfile.TemporaryDirectory # because it will create a symlink to tmpfs, which defeats the purpose # and won't work with reflink/hardlink. dpath = os.path.dirname(os.path.abspath(out)) tmp_dir = os.path.join(dpath, "." + str(shortuuid.uuid())) # Try any links possible to avoid data duplication. # # Not using symlink, because we need to remove cache after we # are done, and to make that work we would have to copy data # over anyway before removing the cache, so we might just copy # it right away. # # Also, we can't use theoretical "move" link type here, because # the same cache file might be used a few times in a directory. cache_types = ["reflink", "hardlink", "copy"] try: with external_repo(url=url, rev=rev, cache_dir=tmp_dir, cache_types=cache_types) as repo: from_info = PathInfo(repo.root_dir) / path to_info = PathInfo(out) repo.repo_tree.download(from_info, to_info, jobs=jobs, follow_subrepos=False) finally: remove(tmp_dir)
def test_show_dir_plots(tmp_dir, dvc, run_copy_metrics): subdir = tmp_dir / "subdir" subdir.mkdir() metric = [ { "first_val": 100, "val": 2 }, { "first_val": 200, "val": 3 }, ] fname = "file.json" _write_json(tmp_dir, metric, fname) p1 = os.path.join("subdir", "p1.json") p2 = os.path.join("subdir", "p2.json") tmp_dir.dvc.run( cmd=(f"mkdir subdir && python copy.py {fname} {p1} && " f"python copy.py {fname} {p2}"), deps=[fname], single_stage=False, plots=["subdir"], name="copy_double", ) result = dvc.plots.show(targets=["subdir"]) p1_content = json.loads(result[p1]) p2_content = json.loads(result[p2]) assert p1_content == p2_content result = dvc.plots.show(targets=[p1]) assert set(result.keys()) == {p1} remove(dvc.odb.local.cache_dir) remove(subdir) with pytest.raises(NoMetricsParsedError): dvc.plots.show()
def test_open_external(tmp_dir, erepo_dir, cloud): erepo_dir.add_remote(config=cloud.config) with erepo_dir.chdir(): erepo_dir.dvc_gen("version", "master", commit="add version") with erepo_dir.branch("branch", new="True"): # NOTE: need file to be other size for Mac erepo_dir.dvc_gen("version", "branchver", commit="add version") erepo_dir.dvc.push(all_branches=True) # Remove cache to force download remove(erepo_dir.dvc.cache.local.cache_dir) # Using file url to force clone to tmp repo repo_url = f"file://{erepo_dir}" with api.open("version", repo=repo_url) as fd: assert fd.read() == "master" assert api.read("version", repo=repo_url, rev="branch") == "branchver"
def test_open_external(remote_url, erepo_dir): _set_remote_url_and_commit(erepo_dir.dvc, remote_url) with erepo_dir.chdir(): erepo_dir.dvc_gen("version", "master", commit="add version") with erepo_dir.branch("branch", new="True"): # NOTE: need file to be other size for Mac erepo_dir.dvc_gen("version", "branchver", commit="add version") erepo_dir.dvc.push(all_branches=True) # Remove cache to force download remove(erepo_dir.dvc.cache.local.cache_dir) # Using file url to force clone to tmp repo repo_url = "file://{}".format(erepo_dir) with api.open("version", repo=repo_url) as fd: assert fd.read() == "master" assert api.read("version", repo=repo_url, rev="branch") == "branchver"
def test_no_cache_entry(tmp_dir, scm, dvc): tmp_dir.dvc_gen("file", "first", commit="add a file") tmp_dir.dvc_gen({"dir": {"1": "1", "2": "2"}}) tmp_dir.dvc_gen("file", "second") remove(tmp_dir / ".dvc" / "cache") (tmp_dir / ".dvc" / "tmp" / "state").unlink() dir_checksum = "5fb6b29836c388e093ca0715c872fe2a.dir" assert dvc.diff() == { "added": [{"path": os.path.join("dir", ""), "hash": dir_checksum}], "deleted": [], "modified": [ { "path": "file", "hash": {"old": digest("first"), "new": digest("second")}, } ], }
def test_collect_non_existing_dir(tmp_dir, dvc, run_copy_metrics): subdir = tmp_dir / "subdir" subdir.mkdir() metric = [{"first_val": 100, "val": 2}, {"first_val": 200, "val": 3}] subdir_metric = [{"y": 101, "x": 3}, {"y": 202, "x": 4}] pname = "source.json" (tmp_dir / pname).dump_json(metric, sort_keys=True) sname = "subdir_source.json" (tmp_dir / sname).dump_json(subdir_metric, sort_keys=True) p1 = os.path.join("subdir", "p1.json") p2 = os.path.join("subdir", "p2.json") subdir_stage = tmp_dir.dvc.run( cmd=( f"mkdir subdir && python copy.py {sname} {p1} && " f"python copy.py {sname} {p2}" ), deps=[sname], single_stage=False, plots=["subdir"], name="copy_double", ) run_copy_metrics( pname, "plot.json", plots=["plot.json"], commit="there is metric", ) remove(subdir_stage.outs[0].cache_path) remove(subdir_stage.outs[0].fs_path) result = dvc.plots.show() assert "error" in result["workspace"]["data"]["subdir"] # make sure others gets loaded assert result["workspace"]["data"]["plot.json"]["data"] == metric
def test_pull_imported_stage_from_subrepos(tmp_dir, dvc, erepo_dir, is_dvc, files): subrepo = erepo_dir / "subrepo" make_subrepo(subrepo, erepo_dir.scm) gen = subrepo.dvc_gen if is_dvc else subrepo.scm_gen with subrepo.chdir(): gen(files, commit="files in subrepo") key = first(files) path = os.path.join("subrepo", key) dvc.imp(os.fspath(erepo_dir), path, out="out") # clean everything remove(dvc.odb.local.cache_dir) remove("out") makedirs(dvc.odb.local.cache_dir) stats = dvc.pull(["out.dvc"]) expected = [f"out{os.sep}"] if isinstance(files[key], dict) else ["out"] assert stats["added"] == expected assert (tmp_dir / "out").read_text() == files[key]
def test_branch_config(tmp_dir, scm): tmp_dir.scm_gen("foo", "foo", commit="init") scm.checkout("branch", create_new=True) dvc = Repo.init() with dvc.config.edit() as conf: conf["remote"]["branch"] = {"url": "/some/path"} scm.add([".dvc"]) scm.commit("init dvc") scm.checkout("master") remove(".dvc") # sanity check with pytest.raises(NotDvcRepoError): Repo() with pytest.raises(NotDvcRepoError): Repo(scm=scm, rev="master") dvc = Repo(scm=scm, rev="branch") assert dvc.config["remote"]["branch"]["url"] == "/some/path"