Пример #1
0
def git(tmp_dir, scm, request):
    from dvc.scm.git import Git

    git_ = Git(os.fspath(tmp_dir), backends=[request.param])
    git_.test_backend = request.param
    yield git_
    git_.close()
Пример #2
0
def _cached_clone(url, rev, for_write=False):
    """Clone an external git repo to a temporary directory.

    Returns the path to a local temporary directory with the specified
    revision checked out. If for_write is set prevents reusing this dir via
    cache.
    """
    if not for_write and Git.is_sha(rev) and (url, rev) in CLONES:
        return CLONES[url, rev]

    clone_path = _clone_default_branch(url, rev)
    rev_sha = Git(clone_path).resolve_rev(rev or "HEAD")

    if not for_write and (url, rev_sha) in CLONES:
        return CLONES[url, rev_sha]

    # Copy to a new dir to keep the clone clean
    repo_path = tempfile.mkdtemp("dvc-erepo")
    logger.debug("erepo: making a copy of {} clone", url)
    copy_tree(clone_path, repo_path)

    # Check out the specified revision
    if rev is not None:
        _git_checkout(repo_path, rev)

    if not for_write:
        CLONES[url, rev_sha] = repo_path
    return repo_path
Пример #3
0
def _clone_default_branch(url, rev):
    """Get or create a clean clone of the url.

    The cloned is reactualized with git pull unless rev is a known sha.
    """
    clone_path = CLONES.get(url)

    git = None
    try:
        if clone_path:
            git = Git(clone_path)
            # Do not pull for known shas, branches and tags might move
            if not Git.is_sha(rev) or not git.has_rev(rev):
                logger.debug("erepo: git pull %s", url)
                git.pull()
        else:
            logger.debug("erepo: git clone %s to a temporary dir", url)
            clone_path = tempfile.mkdtemp("dvc-clone")
            git = Git.clone(url, clone_path)
            CLONES[url] = clone_path
    finally:
        if git:
            git.close()

    return clone_path
Пример #4
0
def test_no_commits(tmp_dir):
    from dvc.scm.git import Git
    from tests.dir_helpers import git_init

    git_init(".")
    assert Git().no_commits

    tmp_dir.gen("foo", "foo")
    Git().add(["foo"])
    Git().commit("foo")

    assert not Git().no_commits
Пример #5
0
def SCM(root_dir):  # pylint: disable=invalid-name
    """Returns SCM instance that corresponds to a repo at the specified
    path.

    Args:
        root_dir (str): path to a root directory of the repo.
        repo (dvc.repo.Repo): dvc repo instance that root_dir belongs to.

    Returns:
        dvc.scm.base.Base: SCM instance.
    """
    if Git.is_repo(root_dir) or Git.is_submodule(root_dir):
        return Git(root_dir)

    return NoSCM(root_dir)
Пример #6
0
def _clone_default_branch(url, rev, for_write=False):
    """Get or create a clean clone of the url.

    The cloned is reactualized with git pull unless rev is a known sha.
    """
    from dvc.scm.git import Git

    clone_path, shallow = CLONES.get(url, (None, False))

    git = None
    try:
        if clone_path:
            git = Git(clone_path)
            # Do not pull for known shas, branches and tags might move
            if not Git.is_sha(rev) or not git.has_rev(rev):
                if shallow:
                    # If we are missing a rev in a shallow clone, fallback to
                    # a full (unshallowed) clone. Since fetching specific rev
                    # SHAs is only available in certain git versions, if we
                    # have need to reference multiple specific revs for a
                    # given repo URL it is easier/safer for us to work with
                    # full clones in this case.
                    logger.debug("erepo: unshallowing clone for '%s'", url)
                    _unshallow(git)
                    shallow = False
                    CLONES[url] = (clone_path, shallow)
                else:
                    logger.debug("erepo: git pull '%s'", url)
                    git.pull()
        else:
            from dvc.scm import clone

            logger.debug("erepo: git clone '%s' to a temporary dir", url)
            clone_path = tempfile.mkdtemp("dvc-clone")
            if not for_write and rev and not Git.is_sha(rev):
                # If rev is a tag or branch name try shallow clone first

                try:
                    git = clone(url, clone_path, shallow_branch=rev)
                    shallow = True
                    logger.debug("erepo: using shallow clone for branch '%s'",
                                 rev)
                except CloneError:
                    pass
            if not git:
                git = clone(url, clone_path)
                shallow = False
            CLONES[url] = (clone_path, shallow)
    finally:
        if git:
            git.close()

    return clone_path, shallow
Пример #7
0
def SCM(root_dir, project=None):  # pylint: disable=invalid-name
    """Returns SCM instance that corresponds to a project at the specified
    path.

    Args:
        root_dir (str): path to a root directory of the project.
        project (dvc.project.Project): dvc project instance that root_dir
            belongs to.

    Returns:
        dvc.scm.base.Base: SCM instance.
    """
    if Git.is_repo(root_dir) or Git.is_submodule(root_dir):
        return Git(root_dir, project=project)

    return Base(root_dir, project=project)
Пример #8
0
def ls(repo, *args, rev=None, git_remote=None, all_=False, **kwargs):
    from dvc.scm import RevError, resolve_rev
    from dvc.scm.git import Git

    if rev:
        try:
            rev = resolve_rev(repo.scm, rev)
        except RevError:
            if not (git_remote and Git.is_sha(rev)):
                # This could be a remote rev that has not been fetched yet
                raise
    elif not all_:
        rev = repo.scm.get_rev()

    results = defaultdict(list)

    if rev:
        if git_remote:
            gen = remote_exp_refs_by_baseline(repo.scm, git_remote, rev)
        else:
            gen = exp_refs_by_baseline(repo.scm, rev)
        for info in gen:
            results[rev].append(info.name)
    elif all_:
        if git_remote:
            gen = remote_exp_refs(repo.scm, git_remote)
        else:
            gen = exp_refs(repo.scm)
        for info in gen:
            results[info.baseline_sha].append(info.name)

    return results
Пример #9
0
def _show_experiments(all_experiments, console, precision=None):
    from rich.table import Table
    from dvc.scm.git import Git

    metric_names, param_names = _collect_names(all_experiments)

    table = Table(row_styles=["white", "bright_white"])
    table.add_column("Experiment", header_style="black on grey93")
    for name in metric_names:
        table.add_column(name,
                         justify="right",
                         header_style="black on cornsilk1")
    for name in param_names:
        table.add_column(name,
                         justify="left",
                         header_style="black on light_cyan1")

    for base_rev, experiments in all_experiments.items():
        if Git.is_sha(base_rev):
            base_rev = base_rev[:7]

        for row, style, in _collect_rows(
                base_rev,
                experiments,
                metric_names,
                param_names,
                precision=precision,
        ):
            table.add_row(*row, style=style)

    console.print(table)
Пример #10
0
def external_repo(url,
                  rev=None,
                  for_write=False,
                  cache_dir=None,
                  cache_types=None,
                  **kwargs):
    from dvc.config import NoRemoteError
    from dvc.scm.git import Git

    logger.debug("Creating external repo %s@%s", url, rev)
    path = _cached_clone(url, rev, for_write=for_write)
    # Local HEAD points to the tip of whatever branch we first cloned from
    # (which may not be the default branch), use origin/HEAD here to get
    # the tip of the default branch
    rev = rev or "refs/remotes/origin/HEAD"

    cache_config = {
        "cache": {
            "dir": cache_dir or _get_cache_dir(url),
            "type": cache_types
        }
    }

    config = _get_remote_config(url) if os.path.isdir(url) else {}
    config.update(cache_config)

    root_dir = path if for_write else os.path.realpath(path)
    repo_kwargs = dict(
        root_dir=root_dir,
        url=url,
        scm=None if for_write else Git(root_dir),
        rev=None if for_write else rev,
        config=config,
        repo_factory=erepo_factory(url, cache_config),
        **kwargs,
    )

    if "subrepos" not in repo_kwargs:
        repo_kwargs["subrepos"] = True

    if "uninitialized" not in repo_kwargs:
        repo_kwargs["uninitialized"] = True

    repo = Repo(**repo_kwargs)

    try:
        yield repo
    except NoRemoteError as exc:
        raise NoRemoteInExternalRepoError(url) from exc
    except OutputNotFoundError as exc:
        if exc.repo is repo:
            raise NoOutputInExternalRepoError(exc.output, repo.root_dir,
                                              url) from exc
        raise
    except FileMissingError as exc:
        raise PathMissingError(exc.path, url) from exc
    finally:
        repo.close()
        if for_write:
            _remove(path)
Пример #11
0
    def __init__(
        self,
        path: str = None,
        rev: str = None,
        scm: "Git" = None,
        trie: "GitTrie" = None,
        rev_resolver: Callable[["Git", str], str] = None,
        **kwargs,
    ):
        from dvc.scm.git import Git
        from dvc.scm.git.objects import GitTrie

        super().__init__(**kwargs)
        if not trie:
            scm = scm or Git(path)
            resolver = rev_resolver or Git.resolve_rev
            resolved = resolver(scm, rev or "HEAD")
            tree_obj = scm.pygit2.get_tree_obj(rev=resolved)
            trie = GitTrie(tree_obj, resolved)
            path = scm.root_dir
        else:
            assert path

        self.trie = trie
        self.root_dir = path
        self.rev = self.trie.rev
Пример #12
0
def external_repo(url, rev=None, for_write=False, **kwargs):
    logger.debug("Creating external repo %s@%s", url, rev)
    path = _cached_clone(url, rev, for_write=for_write)
    # Local HEAD points to the tip of whatever branch we first cloned from
    # (which may not be the default branch), use origin/HEAD here to get
    # the tip of the default branch
    rev = rev or "refs/remotes/origin/HEAD"

    root_dir = path if for_write else os.path.realpath(path)
    conf = dict(
        root_dir=root_dir,
        url=url,
        scm=None if for_write else Git(root_dir),
        rev=None if for_write else rev,
        for_write=for_write,
        uninitialized=True,
        **kwargs,
    )
    repo = ExternalRepo(**conf)

    try:
        yield repo
    except NoRemoteError as exc:
        raise NoRemoteInExternalRepoError(url) from exc
    except OutputNotFoundError as exc:
        if exc.repo is repo:
            raise NoOutputInExternalRepoError(exc.output, repo.root_dir,
                                              url) from exc
        raise
    except FileMissingError as exc:
        raise PathMissingError(exc.path, url) from exc
    finally:
        repo.close()
        if for_write:
            _remove(path)
Пример #13
0
def _show_experiments(all_experiments, console, precision=None):
    from rich.table import Table
    from dvc.scm.git import Git

    metric_names, param_names = _collect_names(all_experiments)

    table = Table()
    table.add_column("Experiment")
    for name in metric_names:
        table.add_column(name, justify="right")
    for name in param_names:
        table.add_column(name, justify="left")

    for base_rev, experiments in all_experiments.items():
        if Git.is_sha(base_rev):
            base_rev = base_rev[:7]

        for row, _, in _collect_rows(
                base_rev,
                experiments,
                metric_names,
                param_names,
                precision=precision,
        ):
            table.add_row(*row)

    console.print(table)
Пример #14
0
    def make(name, *, scm=False, dvc=False, subdir=False):
        from shutil import ignore_patterns

        from dvc.repo import Repo
        from dvc.scm.git import Git
        from dvc.utils.fs import fs_copy

        cache = CACHE.get((scm, dvc, subdir))
        if not cache:
            cache = tmp_path_factory.mktemp("dvc-test-cache" + worker_id)
            TmpDir(cache).init(scm=scm, dvc=dvc, subdir=subdir)
            CACHE[(scm, dvc, subdir)] = os.fspath(cache)
        path = tmp_path_factory.mktemp(name) if isinstance(name, str) else name

        # ignore sqlite files from .dvc/tmp. We might not be closing the cache
        # connection resulting in PermissionErrors in Windows.
        ignore = ignore_patterns("cache.db*")
        for entry in os.listdir(cache):
            # shutil.copytree's dirs_exist_ok is only available in >=3.8
            fs_copy(
                os.path.join(cache, entry),
                os.path.join(path, entry),
                ignore=ignore,
            )
        new_dir = TmpDir(path)
        str_path = os.fspath(new_dir)
        if dvc:
            new_dir.dvc = Repo(str_path)
        if scm:
            new_dir.scm = (new_dir.dvc.scm
                           if hasattr(new_dir, "dvc") else Git(str_path))
        request.addfinalizer(new_dir.close)
        return new_dir
Пример #15
0
    def clone(url, to_path, rev=None):
        from dvc.scm.git import Git

        git = Git.clone(url, to_path, rev=rev)
        git.close()

        return Repo(to_path)
Пример #16
0
 def _scm_checkout(self, rev):
     self.scm.repo.git.reset(hard=True)
     if self.scm.repo.head.is_detached:
         self._checkout_default_branch()
     if not Git.is_sha(rev) or not self.scm.has_rev(rev):
         self.scm.pull()
     logger.debug("Checking out experiment commit '%s'", rev)
     self.scm.checkout(rev)
Пример #17
0
def clone(url: str, to_path: str, **kwargs):
    from dvc.scm.exceptions import CloneError as InternalCloneError

    with TqdmGit(desc="Cloning") as pbar:
        try:
            return Git.clone(url, to_path, progress=pbar.update_git, **kwargs)
        except InternalCloneError as exc:
            raise CloneError(str(exc))
Пример #18
0
 def _scm_checkout(self, rev):
     self.scm.repo.git.reset(hard=True)
     if self.scm.repo.head.is_detached:
         # switch back to default branch
         self.scm.repo.heads[0].checkout()
     if not Git.is_sha(rev) or not self.scm.has_rev(rev):
         self.scm.pull()
     logger.debug("Checking out base experiment commit '%s'", rev)
     self.scm.checkout(rev)
Пример #19
0
 def __init__(self, root_dir, url, rev, for_write=False):
     if for_write:
         super().__init__(root_dir)
     else:
         root_dir = os.path.realpath(root_dir)
         super().__init__(root_dir, scm=Git(root_dir), rev=rev)
     self.url = url
     self._set_cache_dir()
     self._fix_upstream()
Пример #20
0
def test_no_commits(tmp_dir):
    from dvc.repo import Repo
    from dvc.scm.git import Git
    from tests.dir_helpers import git_init

    git_init(".")
    assert Git().no_commits

    assert Repo.init().metrics.diff() == {}
Пример #21
0
def _git_checkout(repo_path, rev):
    logger.debug("erepo: git checkout %s@%s", repo_path, rev)
    git = Git(repo_path)
    try:
        git.checkout(rev)
    finally:
        git.close()
Пример #22
0
def _git_checkout(repo_path, rev):
    from dvc.scm import Git

    git = Git(repo_path)
    try:
        git.checkout(rev)
    finally:
        git.close()
Пример #23
0
def _git_checkout(repo_path, rev):
    from dvc.scm.git import Git

    logger.debug("erepo: git checkout %s@%s", repo_path, rev)
    git = Git(repo_path)
    try:
        git.checkout(rev)
    finally:
        git.close()
Пример #24
0
def scm(tmp_dir, request):
    # Use dvc.scm if available
    if "dvc" in request.fixturenames:
        dvc = request.getfixturevalue("dvc")
        tmp_dir.scm = dvc.scm
        yield dvc.scm

    else:
        from dvc.scm.git import Git

        _git_init()
        try:
            scm = tmp_dir.scm = Git(fspath(tmp_dir))
            yield scm
        finally:
            scm.close()
Пример #25
0
    def init(self, *, scm=False, dvc=False):
        from dvc.repo import Repo
        from dvc.scm.git import Git

        assert not scm or not hasattr(self, "scm")
        assert not dvc or not hasattr(self, "dvc")

        str_path = fspath(self)

        if scm:
            _git_init(str_path)
        if dvc:
            self.dvc = Repo.init(str_path, no_scm=True)
        if scm:
            self.scm = self.dvc.scm if hasattr(self, "dvc") else Git(str_path)
        if dvc and hasattr(self, "scm"):
            self.scm.commit("init dvc")
Пример #26
0
def SCM(root_dir, search_parent_directories=True, no_scm=False):  # pylint: disable=invalid-name
    """Returns SCM instance that corresponds to a repo at the specified
    path.

    Args:
        root_dir (str): path to a root directory of the repo.
        search_parent_directories (bool): whether to look for repo root in
        parent directories.
        no_scm (bool): return NoSCM if True.

    Returns:
        dvc.scm.base.Base: SCM instance.
    """

    if no_scm:
        return NoSCM(root_dir)

    return Git(root_dir, search_parent_directories=search_parent_directories)
Пример #27
0
def _show_experiments(all_experiments, console, **kwargs):
    from rich.table import Table

    from dvc.scm.git import Git

    include_metrics = _parse_list(kwargs.pop("include_metrics", []))
    exclude_metrics = _parse_list(kwargs.pop("exclude_metrics", []))
    include_params = _parse_list(kwargs.pop("include_params", []))
    exclude_params = _parse_list(kwargs.pop("exclude_params", []))

    metric_names, param_names = _collect_names(
        all_experiments,
        include_metrics=include_metrics,
        exclude_metrics=exclude_metrics,
        include_params=include_params,
        exclude_params=exclude_params,
    )

    table = Table()
    table.add_column("Experiment", no_wrap=True)
    if not kwargs.get("no_timestamp", False):
        table.add_column("Created")
    for name in metric_names:
        table.add_column(name, justify="right", no_wrap=True)
    for name in param_names:
        table.add_column(name, justify="left")

    for base_rev, experiments in all_experiments.items():
        if Git.is_sha(base_rev):
            base_rev = base_rev[:7]

        for row, _, in _collect_rows(
                base_rev,
                experiments,
                metric_names,
                param_names,
                **kwargs,
        ):
            table.add_row(*row)

    console.print(table)
Пример #28
0
    def make(name, *, scm=False, dvc=False, subdir=False):
        from dvc.repo import Repo
        from dvc.scm.git import Git
        from dvc.utils.fs import fs_copy

        cache = CACHE.get((scm, dvc, subdir))
        if not cache:
            cache = tmp_path_factory.mktemp("dvc-test-cache" + worker_id)
            TmpDir(cache).init(scm=scm, dvc=dvc, subdir=subdir)
            CACHE[(scm, dvc, subdir)] = os.fspath(cache)
        path = tmp_path_factory.mktemp(name) if isinstance(name, str) else name
        for entry in os.listdir(cache):
            # shutil.copytree's dirs_exist_ok is only available in >=3.8
            fs_copy(os.path.join(cache, entry), os.path.join(path, entry))
        new_dir = TmpDir(path)
        str_path = os.fspath(new_dir)
        if dvc:
            new_dir.dvc = Repo(str_path)
        if scm:
            new_dir.scm = (new_dir.dvc.scm
                           if hasattr(new_dir, "dvc") else Git(str_path))
        request.addfinalizer(new_dir.close)
        return new_dir
Пример #29
0
def _collect_rows(
    base_rev,
    experiments,
    metric_names,
    param_names,
    precision=DEFAULT_PRECISION,
    sort_by=None,
    sort_order=None,
):
    from dvc.scm.git import Git

    if sort_by:
        sort_path, sort_name, sort_type = _sort_column(sort_by, metric_names,
                                                       param_names)
        reverse = sort_order == "desc"
        experiments = _sort_exp(experiments, sort_path, sort_name, sort_type,
                                reverse)

    new_checkpoint = True
    for i, (rev, exp) in enumerate(experiments.items()):
        queued = str(exp.get("queued") or "")
        is_baseline = rev == "baseline"

        if is_baseline:
            name_rev = base_rev[:7] if Git.is_sha(base_rev) else base_rev
        else:
            name_rev = rev[:7]

        exp_name = exp.get("name", "")
        tip = exp.get("checkpoint_tip")

        parent_rev = exp.get("checkpoint_parent", "")
        parent_exp = experiments.get(parent_rev, {})
        parent_tip = parent_exp.get("checkpoint_tip")

        parent = ""
        if is_baseline:
            typ = "baseline"
        elif tip:
            if tip == parent_tip:
                typ = ("checkpoint_tip"
                       if new_checkpoint else "checkpoint_commit")
            elif parent_rev == base_rev:
                typ = "checkpoint_base"
            else:
                typ = "checkpoint_commit"
                parent = parent_rev[:7]
        elif i < len(experiments) - 1:
            typ = "branch_commit"
        else:
            typ = "branch_base"

        if not is_baseline:
            new_checkpoint = not (tip and tip == parent_tip)

        row = [
            exp_name,
            name_rev,
            queued,
            typ,
            _format_time(exp.get("timestamp")),
            parent,
        ]
        _extend_row(row, metric_names,
                    exp.get("metrics", {}).items(), precision)
        _extend_row(row, param_names, exp.get("params", {}).items(), precision)

        yield row
Пример #30
0
 def _scm_checkout(self, rev):
     self.scm.repo.git.reset(hard=True)
     if not Git.is_sha(rev) or not self.scm.has_rev(rev):
         self.scm.fetch(all=True)
     logger.debug("Checking out base experiment commit '%s'", rev)
     self.scm.checkout(rev)