def _clone_default_branch(url, rev, for_write=False): """Get or create a clean clone of the url. The cloned is reactualized with git pull unless rev is a known sha. """ from dvc.scm.git import Git clone_path, shallow = CLONES.get(url, (None, False)) git = None try: if clone_path: git = Git(clone_path) # Do not pull for known shas, branches and tags might move if not Git.is_sha(rev) or not git.has_rev(rev): if shallow: # If we are missing a rev in a shallow clone, fallback to # a full (unshallowed) clone. Since fetching specific rev # SHAs is only available in certain git versions, if we # have need to reference multiple specific revs for a # given repo URL it is easier/safer for us to work with # full clones in this case. logger.debug("erepo: unshallowing clone for '%s'", url) _unshallow(git) shallow = False CLONES[url] = (clone_path, shallow) else: logger.debug("erepo: git pull '%s'", url) git.pull() else: logger.debug("erepo: git clone '%s' to a temporary dir", url) clone_path = tempfile.mkdtemp("dvc-clone") if not for_write and rev and not Git.is_sha(rev): # If rev is a tag or branch name try shallow clone first from dvc.scm.base import CloneError try: git = Git.clone(url, clone_path, shallow_branch=rev) shallow = True logger.debug( "erepo: using shallow clone for branch '%s'", rev ) except CloneError: pass if not git: git = Git.clone(url, clone_path) shallow = False CLONES[url] = (clone_path, shallow) finally: if git: git.close() return clone_path, shallow
def _clone_default_branch(url, rev): """Get or create a clean clone of the url. The cloned is reactualized with git pull unless rev is a known sha. """ clone_path = CLONES.get(url) git = None try: if clone_path: git = Git(clone_path) # Do not pull for known shas, branches and tags might move if not Git.is_sha(rev) or not git.has_rev(rev): logger.debug("erepo: git pull %s", url) git.pull() else: logger.debug("erepo: git clone %s to a temporary dir", url) clone_path = tempfile.mkdtemp("dvc-clone") git = Git.clone(url, clone_path) CLONES[url] = clone_path finally: if git: git.close() return clone_path
def _cached_clone(url, rev, for_write=False): """Clone an external git repo to a temporary directory. Returns the path to a local temporary directory with the specified revision checked out. If for_write is set prevents reusing this dir via cache. """ if not for_write and Git.is_sha(rev) and (url, rev) in CLONES: return CLONES[url, rev] clone_path = _clone_default_branch(url, rev) rev_sha = Git(clone_path).resolve_rev(rev or "HEAD") if not for_write and (url, rev_sha) in CLONES: return CLONES[url, rev_sha] # Copy to a new dir to keep the clone clean repo_path = tempfile.mkdtemp("dvc-erepo") logger.debug("erepo: making a copy of {} clone", url) copy_tree(clone_path, repo_path) # Check out the specified revision if rev is not None: _git_checkout(repo_path, rev) if not for_write: CLONES[url, rev_sha] = repo_path return repo_path
def _show_experiments(all_experiments, console, precision=None): from rich.table import Table from dvc.scm.git import Git metric_names, param_names = _collect_names(all_experiments) table = Table() table.add_column("Experiment") for name in metric_names: table.add_column(name, justify="right") for name in param_names: table.add_column(name, justify="left") for base_rev, experiments in all_experiments.items(): if Git.is_sha(base_rev): base_rev = base_rev[:7] for row, _, in _collect_rows( base_rev, experiments, metric_names, param_names, precision=precision, ): table.add_row(*row) console.print(table)
def _show_experiments(all_experiments, console, precision=None): from rich.table import Table from dvc.scm.git import Git metric_names, param_names = _collect_names(all_experiments) table = Table(row_styles=["white", "bright_white"]) table.add_column("Experiment", header_style="black on grey93") for name in metric_names: table.add_column(name, justify="right", header_style="black on cornsilk1") for name in param_names: table.add_column(name, justify="left", header_style="black on light_cyan1") for base_rev, experiments in all_experiments.items(): if Git.is_sha(base_rev): base_rev = base_rev[:7] for row, style, in _collect_rows( base_rev, experiments, metric_names, param_names, precision=precision, ): table.add_row(*row, style=style) console.print(table)
def ls(repo, *args, rev=None, git_remote=None, all_=False, **kwargs): from dvc.scm import RevError, resolve_rev from dvc.scm.git import Git if rev: try: rev = resolve_rev(repo.scm, rev) except RevError: if not (git_remote and Git.is_sha(rev)): # This could be a remote rev that has not been fetched yet raise elif not all_: rev = repo.scm.get_rev() results = defaultdict(list) if rev: if git_remote: gen = remote_exp_refs_by_baseline(repo.scm, git_remote, rev) else: gen = exp_refs_by_baseline(repo.scm, rev) for info in gen: results[rev].append(info.name) elif all_: if git_remote: gen = remote_exp_refs(repo.scm, git_remote) else: gen = exp_refs(repo.scm) for info in gen: results[info.baseline_sha].append(info.name) return results
def _scm_checkout(self, rev): self.scm.repo.git.reset(hard=True) if self.scm.repo.head.is_detached: self._checkout_default_branch() if not Git.is_sha(rev) or not self.scm.has_rev(rev): self.scm.pull() logger.debug("Checking out experiment commit '%s'", rev) self.scm.checkout(rev)
def _scm_checkout(self, rev): self.scm.repo.git.reset(hard=True) if self.scm.repo.head.is_detached: # switch back to default branch self.scm.repo.heads[0].checkout() if not Git.is_sha(rev) or not self.scm.has_rev(rev): self.scm.pull() logger.debug("Checking out base experiment commit '%s'", rev) self.scm.checkout(rev)
def _show_experiments(all_experiments, console, **kwargs): from rich.table import Table from dvc.scm.git import Git include_metrics = _parse_list(kwargs.pop("include_metrics", [])) exclude_metrics = _parse_list(kwargs.pop("exclude_metrics", [])) include_params = _parse_list(kwargs.pop("include_params", [])) exclude_params = _parse_list(kwargs.pop("exclude_params", [])) metric_names, param_names = _collect_names( all_experiments, include_metrics=include_metrics, exclude_metrics=exclude_metrics, include_params=include_params, exclude_params=exclude_params, ) table = Table() table.add_column("Experiment", no_wrap=True) if not kwargs.get("no_timestamp", False): table.add_column("Created") for name in metric_names: table.add_column(name, justify="right", no_wrap=True) for name in param_names: table.add_column(name, justify="left") for base_rev, experiments in all_experiments.items(): if Git.is_sha(base_rev): base_rev = base_rev[:7] for row, _, in _collect_rows( base_rev, experiments, metric_names, param_names, **kwargs, ): table.add_row(*row) console.print(table)
def _collect_rows( base_rev, experiments, metric_names, param_names, precision=DEFAULT_PRECISION, sort_by=None, sort_order=None, ): from dvc.scm.git import Git if sort_by: sort_path, sort_name, sort_type = _sort_column(sort_by, metric_names, param_names) reverse = sort_order == "desc" experiments = _sort_exp(experiments, sort_path, sort_name, sort_type, reverse) new_checkpoint = True for i, (rev, exp) in enumerate(experiments.items()): queued = str(exp.get("queued") or "") is_baseline = rev == "baseline" if is_baseline: name_rev = base_rev[:7] if Git.is_sha(base_rev) else base_rev else: name_rev = rev[:7] exp_name = exp.get("name", "") tip = exp.get("checkpoint_tip") parent_rev = exp.get("checkpoint_parent", "") parent_exp = experiments.get(parent_rev, {}) parent_tip = parent_exp.get("checkpoint_tip") parent = "" if is_baseline: typ = "baseline" elif tip: if tip == parent_tip: typ = ("checkpoint_tip" if new_checkpoint else "checkpoint_commit") elif parent_rev == base_rev: typ = "checkpoint_base" else: typ = "checkpoint_commit" parent = parent_rev[:7] elif i < len(experiments) - 1: typ = "branch_commit" else: typ = "branch_base" if not is_baseline: new_checkpoint = not (tip and tip == parent_tip) row = [ exp_name, name_rev, queued, typ, _format_time(exp.get("timestamp")), parent, ] _extend_row(row, metric_names, exp.get("metrics", {}).items(), precision) _extend_row(row, param_names, exp.get("params", {}).items(), precision) yield row
def _scm_checkout(self, rev): self.scm.repo.git.reset(hard=True) if not Git.is_sha(rev) or not self.scm.has_rev(rev): self.scm.fetch(all=True) logger.debug("Checking out base experiment commit '%s'", rev) self.scm.checkout(rev)
def _collect_rows( base_rev, experiments, metric_names, param_names, precision=DEFAULT_PRECISION, no_timestamp=False, sort_by=None, sort_order=None, ): if sort_by: if sort_by in metric_names: sort_type = "metrics" elif sort_by in param_names: sort_type = "params" else: raise InvalidArgumentError(f"Unknown sort column '{sort_by}'") reverse = sort_order == "desc" experiments = _sort_exp(experiments, sort_by, sort_type, reverse) new_checkpoint = True for i, (rev, exp) in enumerate(experiments.items()): row = [] style = None queued = "*" if exp.get("queued", False) else "" tip = exp.get("checkpoint_tip") parent = "" if rev == "baseline": if Git.is_sha(base_rev): name_rev = base_rev[:7] else: name_rev = base_rev name = exp.get("name", name_rev) row.append(f"{name}") style = "bold" else: if tip: parent_rev = exp.get("checkpoint_parent", "") parent_exp = experiments.get(parent_rev, {}) parent_tip = parent_exp.get("checkpoint_tip") if tip == parent_tip: if new_checkpoint: tree = "│ ╓" else: tree = "│ ╟" new_checkpoint = False else: if parent_rev == base_rev: tree = "├─╨" else: tree = "│ ╟" parent = f" ({parent_rev[:7]})" new_checkpoint = True else: if i < len(experiments) - 1: tree = "├──" else: tree = "└──" new_checkpoint = True name = exp.get("name", rev[:7]) row.append(f"{tree} {queued}{name}{parent}") if not no_timestamp: row.append(_format_time(exp.get("timestamp"))) _extend_row(row, metric_names, exp.get("metrics", {}).items(), precision) _extend_row(row, param_names, exp.get("params", {}).items(), precision) yield row, style
def _collect_rows( base_rev, experiments, metric_names, param_names, precision=DEFAULT_PRECISION, sort_by=None, sort_order=None, fill_value=FILL_VALUE, iso=False, ): from dvc.scm.git import Git if sort_by: sort_path, sort_name, sort_type = _sort_column(sort_by, metric_names, param_names) reverse = sort_order == "desc" experiments = _sort_exp(experiments, sort_path, sort_name, sort_type, reverse) new_checkpoint = True for i, (rev, results) in enumerate(experiments.items()): exp = results.get("data", {}) if exp.get("running"): state = "Running" elif exp.get("queued"): state = "Queued" else: state = fill_value executor = exp.get("executor", fill_value) is_baseline = rev == "baseline" if is_baseline: name_rev = base_rev[:7] if Git.is_sha(base_rev) else base_rev else: name_rev = rev[:7] exp_name = exp.get("name", "") tip = exp.get("checkpoint_tip") parent_rev = exp.get("checkpoint_parent", "") parent_exp = experiments.get(parent_rev, {}).get("data", {}) parent_tip = parent_exp.get("checkpoint_tip") parent = "" if is_baseline: typ = "baseline" elif tip: if tip == parent_tip: typ = ("checkpoint_tip" if new_checkpoint else "checkpoint_commit") elif parent_rev == base_rev: typ = "checkpoint_base" else: typ = "checkpoint_commit" parent = parent_rev[:7] elif i < len(experiments) - 1: typ = "branch_commit" else: typ = "branch_base" if not is_baseline: new_checkpoint = not (tip and tip == parent_tip) row = [ exp_name, name_rev, typ, _format_time(exp.get("timestamp"), fill_value, iso), parent, state, executor, ] fill_value = FILL_VALUE_ERRORED if results.get("error") else fill_value _extend_row( row, metric_names, exp.get("metrics", {}).items(), precision, fill_value=fill_value, ) _extend_row( row, param_names, exp.get("params", {}).items(), precision, fill_value=fill_value, ) yield row
def _collect_rows( base_rev, experiments, metric_names, param_names, precision=DEFAULT_PRECISION, sort_by=None, sort_order=None, ): from dvc.scm.git import Git if sort_by: sort_path, sort_name, sort_type = _sort_column(sort_by, metric_names, param_names) reverse = sort_order == "desc" experiments = _sort_exp(experiments, sort_path, sort_name, sort_type, reverse) new_checkpoint = True for i, (rev, exp) in enumerate(experiments.items()): queued = "*" if exp.get("queued", False) else "" tip = exp.get("checkpoint_tip") parent = "" if rev == "baseline": if Git.is_sha(base_rev): name_rev = base_rev[:7] else: name_rev = base_rev text = exp.get("name", name_rev) else: if tip: parent_rev = exp.get("checkpoint_parent", "") parent_exp = experiments.get(parent_rev, {}) parent_tip = parent_exp.get("checkpoint_tip") if tip == parent_tip: if new_checkpoint: tree = "│ ╓" else: tree = "│ ╟" new_checkpoint = False else: if parent_rev == base_rev: tree = "├─╨" else: tree = "│ ╟" parent = f" ({parent_rev[:7]})" new_checkpoint = True else: if i < len(experiments) - 1: tree = "├──" else: tree = "└──" new_checkpoint = True name = exp.get("name", rev[:7]) text = f"{tree} {queued}{name}{parent}" row = [text, _format_time(exp.get("timestamp")), rev == "baseline"] _extend_row(row, metric_names, exp.get("metrics", {}).items(), precision) _extend_row(row, param_names, exp.get("params", {}).items(), precision) yield row