def git(tmp_dir, scm, request): from dvc.scm.git import Git git_ = Git(os.fspath(tmp_dir), backends=[request.param]) git_.test_backend = request.param yield git_ git_.close()
def _cached_clone(url, rev, for_write=False): """Clone an external git repo to a temporary directory. Returns the path to a local temporary directory with the specified revision checked out. If for_write is set prevents reusing this dir via cache. """ if not for_write and Git.is_sha(rev) and (url, rev) in CLONES: return CLONES[url, rev] clone_path = _clone_default_branch(url, rev) rev_sha = Git(clone_path).resolve_rev(rev or "HEAD") if not for_write and (url, rev_sha) in CLONES: return CLONES[url, rev_sha] # Copy to a new dir to keep the clone clean repo_path = tempfile.mkdtemp("dvc-erepo") logger.debug("erepo: making a copy of {} clone", url) copy_tree(clone_path, repo_path) # Check out the specified revision if rev is not None: _git_checkout(repo_path, rev) if not for_write: CLONES[url, rev_sha] = repo_path return repo_path
def _clone_default_branch(url, rev): """Get or create a clean clone of the url. The cloned is reactualized with git pull unless rev is a known sha. """ clone_path = CLONES.get(url) git = None try: if clone_path: git = Git(clone_path) # Do not pull for known shas, branches and tags might move if not Git.is_sha(rev) or not git.has_rev(rev): logger.debug("erepo: git pull %s", url) git.pull() else: logger.debug("erepo: git clone %s to a temporary dir", url) clone_path = tempfile.mkdtemp("dvc-clone") git = Git.clone(url, clone_path) CLONES[url] = clone_path finally: if git: git.close() return clone_path
def test_no_commits(tmp_dir): from dvc.scm.git import Git from tests.dir_helpers import git_init git_init(".") assert Git().no_commits tmp_dir.gen("foo", "foo") Git().add(["foo"]) Git().commit("foo") assert not Git().no_commits
def SCM(root_dir): # pylint: disable=invalid-name """Returns SCM instance that corresponds to a repo at the specified path. Args: root_dir (str): path to a root directory of the repo. repo (dvc.repo.Repo): dvc repo instance that root_dir belongs to. Returns: dvc.scm.base.Base: SCM instance. """ if Git.is_repo(root_dir) or Git.is_submodule(root_dir): return Git(root_dir) return NoSCM(root_dir)
def _clone_default_branch(url, rev, for_write=False): """Get or create a clean clone of the url. The cloned is reactualized with git pull unless rev is a known sha. """ from dvc.scm.git import Git clone_path, shallow = CLONES.get(url, (None, False)) git = None try: if clone_path: git = Git(clone_path) # Do not pull for known shas, branches and tags might move if not Git.is_sha(rev) or not git.has_rev(rev): if shallow: # If we are missing a rev in a shallow clone, fallback to # a full (unshallowed) clone. Since fetching specific rev # SHAs is only available in certain git versions, if we # have need to reference multiple specific revs for a # given repo URL it is easier/safer for us to work with # full clones in this case. logger.debug("erepo: unshallowing clone for '%s'", url) _unshallow(git) shallow = False CLONES[url] = (clone_path, shallow) else: logger.debug("erepo: git pull '%s'", url) git.pull() else: from dvc.scm import clone logger.debug("erepo: git clone '%s' to a temporary dir", url) clone_path = tempfile.mkdtemp("dvc-clone") if not for_write and rev and not Git.is_sha(rev): # If rev is a tag or branch name try shallow clone first try: git = clone(url, clone_path, shallow_branch=rev) shallow = True logger.debug("erepo: using shallow clone for branch '%s'", rev) except CloneError: pass if not git: git = clone(url, clone_path) shallow = False CLONES[url] = (clone_path, shallow) finally: if git: git.close() return clone_path, shallow
def SCM(root_dir, project=None): # pylint: disable=invalid-name """Returns SCM instance that corresponds to a project at the specified path. Args: root_dir (str): path to a root directory of the project. project (dvc.project.Project): dvc project instance that root_dir belongs to. Returns: dvc.scm.base.Base: SCM instance. """ if Git.is_repo(root_dir) or Git.is_submodule(root_dir): return Git(root_dir, project=project) return Base(root_dir, project=project)
def ls(repo, *args, rev=None, git_remote=None, all_=False, **kwargs): from dvc.scm import RevError, resolve_rev from dvc.scm.git import Git if rev: try: rev = resolve_rev(repo.scm, rev) except RevError: if not (git_remote and Git.is_sha(rev)): # This could be a remote rev that has not been fetched yet raise elif not all_: rev = repo.scm.get_rev() results = defaultdict(list) if rev: if git_remote: gen = remote_exp_refs_by_baseline(repo.scm, git_remote, rev) else: gen = exp_refs_by_baseline(repo.scm, rev) for info in gen: results[rev].append(info.name) elif all_: if git_remote: gen = remote_exp_refs(repo.scm, git_remote) else: gen = exp_refs(repo.scm) for info in gen: results[info.baseline_sha].append(info.name) return results
def _show_experiments(all_experiments, console, precision=None): from rich.table import Table from dvc.scm.git import Git metric_names, param_names = _collect_names(all_experiments) table = Table(row_styles=["white", "bright_white"]) table.add_column("Experiment", header_style="black on grey93") for name in metric_names: table.add_column(name, justify="right", header_style="black on cornsilk1") for name in param_names: table.add_column(name, justify="left", header_style="black on light_cyan1") for base_rev, experiments in all_experiments.items(): if Git.is_sha(base_rev): base_rev = base_rev[:7] for row, style, in _collect_rows( base_rev, experiments, metric_names, param_names, precision=precision, ): table.add_row(*row, style=style) console.print(table)
def external_repo(url, rev=None, for_write=False, cache_dir=None, cache_types=None, **kwargs): from dvc.config import NoRemoteError from dvc.scm.git import Git logger.debug("Creating external repo %s@%s", url, rev) path = _cached_clone(url, rev, for_write=for_write) # Local HEAD points to the tip of whatever branch we first cloned from # (which may not be the default branch), use origin/HEAD here to get # the tip of the default branch rev = rev or "refs/remotes/origin/HEAD" cache_config = { "cache": { "dir": cache_dir or _get_cache_dir(url), "type": cache_types } } config = _get_remote_config(url) if os.path.isdir(url) else {} config.update(cache_config) root_dir = path if for_write else os.path.realpath(path) repo_kwargs = dict( root_dir=root_dir, url=url, scm=None if for_write else Git(root_dir), rev=None if for_write else rev, config=config, repo_factory=erepo_factory(url, cache_config), **kwargs, ) if "subrepos" not in repo_kwargs: repo_kwargs["subrepos"] = True if "uninitialized" not in repo_kwargs: repo_kwargs["uninitialized"] = True repo = Repo(**repo_kwargs) try: yield repo except NoRemoteError as exc: raise NoRemoteInExternalRepoError(url) from exc except OutputNotFoundError as exc: if exc.repo is repo: raise NoOutputInExternalRepoError(exc.output, repo.root_dir, url) from exc raise except FileMissingError as exc: raise PathMissingError(exc.path, url) from exc finally: repo.close() if for_write: _remove(path)
def __init__( self, path: str = None, rev: str = None, scm: "Git" = None, trie: "GitTrie" = None, rev_resolver: Callable[["Git", str], str] = None, **kwargs, ): from dvc.scm.git import Git from dvc.scm.git.objects import GitTrie super().__init__(**kwargs) if not trie: scm = scm or Git(path) resolver = rev_resolver or Git.resolve_rev resolved = resolver(scm, rev or "HEAD") tree_obj = scm.pygit2.get_tree_obj(rev=resolved) trie = GitTrie(tree_obj, resolved) path = scm.root_dir else: assert path self.trie = trie self.root_dir = path self.rev = self.trie.rev
def external_repo(url, rev=None, for_write=False, **kwargs): logger.debug("Creating external repo %s@%s", url, rev) path = _cached_clone(url, rev, for_write=for_write) # Local HEAD points to the tip of whatever branch we first cloned from # (which may not be the default branch), use origin/HEAD here to get # the tip of the default branch rev = rev or "refs/remotes/origin/HEAD" root_dir = path if for_write else os.path.realpath(path) conf = dict( root_dir=root_dir, url=url, scm=None if for_write else Git(root_dir), rev=None if for_write else rev, for_write=for_write, uninitialized=True, **kwargs, ) repo = ExternalRepo(**conf) try: yield repo except NoRemoteError as exc: raise NoRemoteInExternalRepoError(url) from exc except OutputNotFoundError as exc: if exc.repo is repo: raise NoOutputInExternalRepoError(exc.output, repo.root_dir, url) from exc raise except FileMissingError as exc: raise PathMissingError(exc.path, url) from exc finally: repo.close() if for_write: _remove(path)
def _show_experiments(all_experiments, console, precision=None): from rich.table import Table from dvc.scm.git import Git metric_names, param_names = _collect_names(all_experiments) table = Table() table.add_column("Experiment") for name in metric_names: table.add_column(name, justify="right") for name in param_names: table.add_column(name, justify="left") for base_rev, experiments in all_experiments.items(): if Git.is_sha(base_rev): base_rev = base_rev[:7] for row, _, in _collect_rows( base_rev, experiments, metric_names, param_names, precision=precision, ): table.add_row(*row) console.print(table)
def make(name, *, scm=False, dvc=False, subdir=False): from shutil import ignore_patterns from dvc.repo import Repo from dvc.scm.git import Git from dvc.utils.fs import fs_copy cache = CACHE.get((scm, dvc, subdir)) if not cache: cache = tmp_path_factory.mktemp("dvc-test-cache" + worker_id) TmpDir(cache).init(scm=scm, dvc=dvc, subdir=subdir) CACHE[(scm, dvc, subdir)] = os.fspath(cache) path = tmp_path_factory.mktemp(name) if isinstance(name, str) else name # ignore sqlite files from .dvc/tmp. We might not be closing the cache # connection resulting in PermissionErrors in Windows. ignore = ignore_patterns("cache.db*") for entry in os.listdir(cache): # shutil.copytree's dirs_exist_ok is only available in >=3.8 fs_copy( os.path.join(cache, entry), os.path.join(path, entry), ignore=ignore, ) new_dir = TmpDir(path) str_path = os.fspath(new_dir) if dvc: new_dir.dvc = Repo(str_path) if scm: new_dir.scm = (new_dir.dvc.scm if hasattr(new_dir, "dvc") else Git(str_path)) request.addfinalizer(new_dir.close) return new_dir
def clone(url, to_path, rev=None): from dvc.scm.git import Git git = Git.clone(url, to_path, rev=rev) git.close() return Repo(to_path)
def _scm_checkout(self, rev): self.scm.repo.git.reset(hard=True) if self.scm.repo.head.is_detached: self._checkout_default_branch() if not Git.is_sha(rev) or not self.scm.has_rev(rev): self.scm.pull() logger.debug("Checking out experiment commit '%s'", rev) self.scm.checkout(rev)
def clone(url: str, to_path: str, **kwargs): from dvc.scm.exceptions import CloneError as InternalCloneError with TqdmGit(desc="Cloning") as pbar: try: return Git.clone(url, to_path, progress=pbar.update_git, **kwargs) except InternalCloneError as exc: raise CloneError(str(exc))
def _scm_checkout(self, rev): self.scm.repo.git.reset(hard=True) if self.scm.repo.head.is_detached: # switch back to default branch self.scm.repo.heads[0].checkout() if not Git.is_sha(rev) or not self.scm.has_rev(rev): self.scm.pull() logger.debug("Checking out base experiment commit '%s'", rev) self.scm.checkout(rev)
def __init__(self, root_dir, url, rev, for_write=False): if for_write: super().__init__(root_dir) else: root_dir = os.path.realpath(root_dir) super().__init__(root_dir, scm=Git(root_dir), rev=rev) self.url = url self._set_cache_dir() self._fix_upstream()
def test_no_commits(tmp_dir): from dvc.repo import Repo from dvc.scm.git import Git from tests.dir_helpers import git_init git_init(".") assert Git().no_commits assert Repo.init().metrics.diff() == {}
def _git_checkout(repo_path, rev): logger.debug("erepo: git checkout %s@%s", repo_path, rev) git = Git(repo_path) try: git.checkout(rev) finally: git.close()
def _git_checkout(repo_path, rev): from dvc.scm import Git git = Git(repo_path) try: git.checkout(rev) finally: git.close()
def _git_checkout(repo_path, rev): from dvc.scm.git import Git logger.debug("erepo: git checkout %s@%s", repo_path, rev) git = Git(repo_path) try: git.checkout(rev) finally: git.close()
def scm(tmp_dir, request): # Use dvc.scm if available if "dvc" in request.fixturenames: dvc = request.getfixturevalue("dvc") tmp_dir.scm = dvc.scm yield dvc.scm else: from dvc.scm.git import Git _git_init() try: scm = tmp_dir.scm = Git(fspath(tmp_dir)) yield scm finally: scm.close()
def init(self, *, scm=False, dvc=False): from dvc.repo import Repo from dvc.scm.git import Git assert not scm or not hasattr(self, "scm") assert not dvc or not hasattr(self, "dvc") str_path = fspath(self) if scm: _git_init(str_path) if dvc: self.dvc = Repo.init(str_path, no_scm=True) if scm: self.scm = self.dvc.scm if hasattr(self, "dvc") else Git(str_path) if dvc and hasattr(self, "scm"): self.scm.commit("init dvc")
def SCM(root_dir, search_parent_directories=True, no_scm=False): # pylint: disable=invalid-name """Returns SCM instance that corresponds to a repo at the specified path. Args: root_dir (str): path to a root directory of the repo. search_parent_directories (bool): whether to look for repo root in parent directories. no_scm (bool): return NoSCM if True. Returns: dvc.scm.base.Base: SCM instance. """ if no_scm: return NoSCM(root_dir) return Git(root_dir, search_parent_directories=search_parent_directories)
def _show_experiments(all_experiments, console, **kwargs): from rich.table import Table from dvc.scm.git import Git include_metrics = _parse_list(kwargs.pop("include_metrics", [])) exclude_metrics = _parse_list(kwargs.pop("exclude_metrics", [])) include_params = _parse_list(kwargs.pop("include_params", [])) exclude_params = _parse_list(kwargs.pop("exclude_params", [])) metric_names, param_names = _collect_names( all_experiments, include_metrics=include_metrics, exclude_metrics=exclude_metrics, include_params=include_params, exclude_params=exclude_params, ) table = Table() table.add_column("Experiment", no_wrap=True) if not kwargs.get("no_timestamp", False): table.add_column("Created") for name in metric_names: table.add_column(name, justify="right", no_wrap=True) for name in param_names: table.add_column(name, justify="left") for base_rev, experiments in all_experiments.items(): if Git.is_sha(base_rev): base_rev = base_rev[:7] for row, _, in _collect_rows( base_rev, experiments, metric_names, param_names, **kwargs, ): table.add_row(*row) console.print(table)
def make(name, *, scm=False, dvc=False, subdir=False): from dvc.repo import Repo from dvc.scm.git import Git from dvc.utils.fs import fs_copy cache = CACHE.get((scm, dvc, subdir)) if not cache: cache = tmp_path_factory.mktemp("dvc-test-cache" + worker_id) TmpDir(cache).init(scm=scm, dvc=dvc, subdir=subdir) CACHE[(scm, dvc, subdir)] = os.fspath(cache) path = tmp_path_factory.mktemp(name) if isinstance(name, str) else name for entry in os.listdir(cache): # shutil.copytree's dirs_exist_ok is only available in >=3.8 fs_copy(os.path.join(cache, entry), os.path.join(path, entry)) new_dir = TmpDir(path) str_path = os.fspath(new_dir) if dvc: new_dir.dvc = Repo(str_path) if scm: new_dir.scm = (new_dir.dvc.scm if hasattr(new_dir, "dvc") else Git(str_path)) request.addfinalizer(new_dir.close) return new_dir
def _collect_rows( base_rev, experiments, metric_names, param_names, precision=DEFAULT_PRECISION, sort_by=None, sort_order=None, ): from dvc.scm.git import Git if sort_by: sort_path, sort_name, sort_type = _sort_column(sort_by, metric_names, param_names) reverse = sort_order == "desc" experiments = _sort_exp(experiments, sort_path, sort_name, sort_type, reverse) new_checkpoint = True for i, (rev, exp) in enumerate(experiments.items()): queued = str(exp.get("queued") or "") is_baseline = rev == "baseline" if is_baseline: name_rev = base_rev[:7] if Git.is_sha(base_rev) else base_rev else: name_rev = rev[:7] exp_name = exp.get("name", "") tip = exp.get("checkpoint_tip") parent_rev = exp.get("checkpoint_parent", "") parent_exp = experiments.get(parent_rev, {}) parent_tip = parent_exp.get("checkpoint_tip") parent = "" if is_baseline: typ = "baseline" elif tip: if tip == parent_tip: typ = ("checkpoint_tip" if new_checkpoint else "checkpoint_commit") elif parent_rev == base_rev: typ = "checkpoint_base" else: typ = "checkpoint_commit" parent = parent_rev[:7] elif i < len(experiments) - 1: typ = "branch_commit" else: typ = "branch_base" if not is_baseline: new_checkpoint = not (tip and tip == parent_tip) row = [ exp_name, name_rev, queued, typ, _format_time(exp.get("timestamp")), parent, ] _extend_row(row, metric_names, exp.get("metrics", {}).items(), precision) _extend_row(row, param_names, exp.get("params", {}).items(), precision) yield row
def _scm_checkout(self, rev): self.scm.repo.git.reset(hard=True) if not Git.is_sha(rev) or not self.scm.has_rev(rev): self.scm.fetch(all=True) logger.debug("Checking out base experiment commit '%s'", rev) self.scm.checkout(rev)