def diff(repo, *args, a_rev=None, b_rev=None, param_deps=False, **kwargs): from dvc.repo.experiments.show import _collect_experiment_commit from dvc.scm import resolve_rev if repo.scm.no_commits: return {} if a_rev: a_rev = fix_exp_head(repo.scm, a_rev) rev = resolve_rev(repo.scm, a_rev) old = _collect_experiment_commit(repo, rev, param_deps=param_deps) else: old = _collect_experiment_commit( repo, fix_exp_head(repo.scm, "HEAD"), param_deps=param_deps ) if b_rev: b_rev = fix_exp_head(repo.scm, b_rev) rev = resolve_rev(repo.scm, b_rev) new = _collect_experiment_commit(repo, rev, param_deps=param_deps) else: new = _collect_experiment_commit( repo, "workspace", param_deps=param_deps ) with_unchanged = kwargs.pop("all", False) return { key: _diff( format_dict(old.get("data", {}).get(key, {})), format_dict(new.get("data", {}).get(key, {})), with_unchanged=with_unchanged, ) for key in ["metrics", "params"] }
def test_fix_exp_head(tmp_dir, scm, tail): from dvc.repo.experiments.base import EXEC_BASELINE from dvc.repo.experiments.utils import fix_exp_head head = "HEAD" + tail assert head == fix_exp_head(scm, head) scm.set_ref(EXEC_BASELINE, "refs/heads/master") assert EXEC_BASELINE + tail == fix_exp_head(scm, head) assert "foo" + tail == fix_exp_head(scm, "foo" + tail)
def diff(repo, *args, a_rev=None, b_rev=None, **kwargs): if repo.scm.no_commits: return {} with_unchanged = kwargs.pop("all", False) a_rev = a_rev or "HEAD" a_rev = fix_exp_head(repo.scm, a_rev) b_rev = fix_exp_head(repo.scm, b_rev) or "workspace" metrics = _get_metrics(repo, *args, **kwargs, revs=[a_rev, b_rev]) old = metrics.get(a_rev, {}) new = metrics.get(b_rev, {}) return _diff(format_dict(old), format_dict(new), with_unchanged=with_unchanged)
def _revisions(repo, revs, experiment): revisions = revs or [] if experiment and len(revisions) == 1: baseline = repo.experiments.get_baseline(revisions[0]) if baseline: revisions.append(baseline[:7]) if len(revisions) <= 1: if len(revisions) == 0 and repo.scm.is_dirty(): revisions.append(fix_exp_head(repo.scm, "HEAD")) revisions.append("workspace") return revisions
def iter_revs( scm: "Git", head_revs: Optional[List[str]] = None, num: int = 1, all_branches: bool = False, all_tags: bool = False, all_commits: bool = False, all_experiments: bool = False, ) -> Mapping[str, List[str]]: from dvc.repo.experiments.utils import fix_exp_head if num < 1 and num != -1: raise InvalidArgumentError(f"Invalid number of commits '{num}'") if not any( [head_revs, all_branches, all_tags, all_commits, all_experiments] ): return {} head_revs = head_revs or [] revs = [] for rev in head_revs: revs.append(rev) n = 1 while True: if num == n: break try: head = fix_exp_head(scm, f"{rev}~{n}") assert head revs.append(resolve_rev(scm, head)) except RevError: break n += 1 if all_commits: revs.extend(scm.list_all_commits()) else: if all_branches: revs.extend(scm.list_branches()) if all_tags: revs.extend(scm.list_tags()) if all_experiments: from dvc.repo.experiments.utils import exp_commits revs.extend(exp_commits(scm)) rev_resolver = partial(resolve_rev, scm) return group_by(rev_resolver, revs)
def resolve_rev(scm: "Git", rev: str) -> str: from scmrepo.exceptions import RevError as InternalRevError from dvc.repo.experiments.utils import fix_exp_head try: return scm.resolve_rev(fix_exp_head(scm, rev)) except InternalRevError as exc: # `scm` will only resolve git branch and tag names, # if rev is not a sha it may be an abbreviated experiment name if not rev.startswith("refs/"): from dvc.repo.experiments.utils import ( AmbiguousExpRefInfo, resolve_name, ) try: ref_infos = resolve_name(scm, rev).get(rev) except AmbiguousExpRefInfo: raise RevError(f"ambiguous Git revision '{rev}'") if ref_infos: return scm.get_ref(str(ref_infos)) raise RevError(str(exc))
def diff(self, a_rev="HEAD", b_rev=None, targets=None): """ By default, it compares the workspace with the last commit's fs. This implementation differs from `git diff` since DVC doesn't have the concept of `index`, but it keeps the same interface, thus, `dvc diff` would be the same as `dvc diff HEAD`. """ if self.scm.no_commits: return {} from dvc.fs.repo import RepoFileSystem repo_fs = RepoFileSystem(self) a_rev = fix_exp_head(self.scm, a_rev) b_rev = fix_exp_head(self.scm, b_rev) if b_rev else "workspace" results = {} missing_targets = {} for rev in self.brancher(revs=[a_rev, b_rev]): if rev == "workspace" and rev != b_rev: # brancher always returns workspace, but we only need to compute # workspace paths/checksums if b_rev was None continue targets_path_infos = None if targets is not None: # convert targets to path_infos, and capture any missing targets targets_path_infos, missing_targets[rev] = _targets_to_path_infos( repo_fs, targets ) results[rev] = _paths_checksums(self, targets_path_infos) if targets is not None: # check for overlapping missing targets between a_rev and b_rev for target in set(missing_targets[a_rev]) & set( missing_targets[b_rev] ): raise PathMissingError(target, self) old = results[a_rev] new = results[b_rev] # Compare paths between the old and new fs. # set() efficiently converts dict keys to a set added = sorted(set(new) - set(old)) deleted_or_missing = set(old) - set(new) if b_rev == "workspace": # missing status is only applicable when diffing local workspace # against a commit missing = sorted(_filter_missing(repo_fs, deleted_or_missing)) else: missing = [] deleted = sorted(deleted_or_missing - set(missing)) modified = sorted(set(old) & set(new)) # Cases when file was changed and renamed are resulted # in having deleted and added record # To cover such cases we need to change hashing function # to produce rolling/chunking hash renamed = _calculate_renamed(new, old, added, deleted) for renamed_item in renamed: added.remove(renamed_item["path"]["new"]) deleted.remove(renamed_item["path"]["old"]) ret = { "added": [{"path": path, "hash": new[path]} for path in added], "deleted": [{"path": path, "hash": old[path]} for path in deleted], "modified": [ {"path": path, "hash": {"old": old[path], "new": new[path]}} for path in modified if old[path] != new[path] ], "renamed": renamed, "not in cache": [ {"path": path, "hash": old[path]} for path in missing ], } return ret if any(ret.values()) else {}
def show( repo, all_branches=False, all_tags=False, revs=None, all_commits=False, sha_only=False, num=1, param_deps=False, onerror: Optional[Callable] = None, ): if onerror is None: onerror = onerror_collect res: Dict[str, Dict] = defaultdict(OrderedDict) if num < 1: raise InvalidArgumentError(f"Invalid number of commits '{num}'") if revs is None: from dvc.scm import RevError, resolve_rev revs = [] for n in range(num): try: head = fix_exp_head(repo.scm, f"HEAD~{n}") assert head revs.append(resolve_rev(repo.scm, head)) except RevError: break revs = OrderedDict((rev, None) for rev in repo.brancher( revs=revs, all_branches=all_branches, all_tags=all_tags, all_commits=all_commits, sha_only=True, )) running = repo.experiments.get_running_exps() for rev in revs: res[rev]["baseline"] = _collect_experiment_commit( repo, rev, sha_only=sha_only, param_deps=param_deps, running=running, onerror=onerror, ) if rev == "workspace": continue ref_info = ExpRefInfo(baseline_sha=rev) commits = [(ref, repo.scm.resolve_commit(ref)) for ref in repo.scm.iter_refs(base=str(ref_info))] for exp_ref, _ in sorted(commits, key=lambda x: x[1].commit_time, reverse=True): ref_info = ExpRefInfo.from_ref(exp_ref) assert ref_info.baseline_sha == rev _collect_experiment_branch( res[rev], repo, exp_ref, rev, sha_only=sha_only, param_deps=param_deps, running=running, onerror=onerror, ) # collect queued (not yet reproduced) experiments for stash_rev, entry in repo.experiments.stash_revs.items(): if entry.baseline_rev in revs: if stash_rev not in running or not running[stash_rev].get( "last"): experiment = _collect_experiment_commit( repo, stash_rev, sha_only=sha_only, stash=stash_rev not in running, param_deps=param_deps, running=running, onerror=onerror, ) res[entry.baseline_rev][stash_rev] = experiment return res
def show( repo, all_branches=False, all_tags=False, revs=None, all_commits=False, sha_only=False, num=1, param_deps=False, ): res = defaultdict(OrderedDict) if num < 1: raise InvalidArgumentError(f"Invalid number of commits '{num}'") if revs is None: revs = [] for n in range(num): try: head = fix_exp_head(repo.scm, f"HEAD~{n}") revs.append(repo.scm.resolve_rev(head)) except SCMError: break revs = OrderedDict((rev, None) for rev in repo.brancher( revs=revs, all_branches=all_branches, all_tags=all_tags, all_commits=all_commits, sha_only=True, )) for rev in revs: res[rev]["baseline"] = _collect_experiment_commit( repo, rev, sha_only=sha_only, param_deps=param_deps) if rev == "workspace": continue ref_info = ExpRefInfo(baseline_sha=rev) commits = [(ref, repo.scm.resolve_commit(ref)) for ref in repo.scm.iter_refs(base=str(ref_info))] for exp_ref, _ in sorted( commits, key=lambda x: x[1].commit_time, reverse=True, ): ref_info = ExpRefInfo.from_ref(exp_ref) assert ref_info.baseline_sha == rev _collect_experiment_branch( res[rev], repo, exp_ref, rev, sha_only=sha_only, param_deps=param_deps, ) # collect queued (not yet reproduced) experiments for stash_rev, entry in repo.experiments.stash_revs.items(): if entry.baseline_rev in revs: experiment = _collect_experiment_commit(repo, stash_rev, stash=True, param_deps=param_deps) res[entry.baseline_rev][stash_rev] = experiment return res