Exemplo n.º 1
0
def _git_checkout(repo_path, rev):
    logger.debug("erepo: git checkout %s@%s", repo_path, rev)
    git = Git(repo_path)
    try:
        git.checkout(rev)
    finally:
        git.close()
Exemplo n.º 2
0
def git(tmp_dir, scm, request):
    from dvc.scm.git import Git

    git_ = Git(os.fspath(tmp_dir), backends=[request.param])
    git_.test_backend = request.param
    yield git_
    git_.close()
Exemplo n.º 3
0
def _clone_default_branch(url, rev):
    """Get or create a clean clone of the url.

    The cloned is reactualized with git pull unless rev is a known sha.
    """
    clone_path = CLONES.get(url)

    git = None
    try:
        if clone_path:
            git = Git(clone_path)
            # Do not pull for known shas, branches and tags might move
            if not Git.is_sha(rev) or not git.has_rev(rev):
                logger.debug("erepo: git pull %s", url)
                git.pull()
        else:
            logger.debug("erepo: git clone %s to a temporary dir", url)
            clone_path = tempfile.mkdtemp("dvc-clone")
            git = Git.clone(url, clone_path)
            CLONES[url] = clone_path
    finally:
        if git:
            git.close()

    return clone_path
Exemplo n.º 4
0
def _git_checkout(repo_path, rev):
    from dvc.scm import Git

    git = Git(repo_path)
    try:
        git.checkout(rev)
    finally:
        git.close()
Exemplo n.º 5
0
def _git_checkout(repo_path, rev):
    from dvc.scm.git import Git

    logger.debug("erepo: git checkout %s@%s", repo_path, rev)
    git = Git(repo_path)
    try:
        git.checkout(rev)
    finally:
        git.close()
Exemplo n.º 6
0
def _clone_default_branch(url, rev, for_write=False):
    """Get or create a clean clone of the url.

    The cloned is reactualized with git pull unless rev is a known sha.
    """
    from dvc.scm.git import Git

    clone_path, shallow = CLONES.get(url, (None, False))

    git = None
    try:
        if clone_path:
            git = Git(clone_path)
            # Do not pull for known shas, branches and tags might move
            if not Git.is_sha(rev) or not git.has_rev(rev):
                if shallow:
                    # If we are missing a rev in a shallow clone, fallback to
                    # a full (unshallowed) clone. Since fetching specific rev
                    # SHAs is only available in certain git versions, if we
                    # have need to reference multiple specific revs for a
                    # given repo URL it is easier/safer for us to work with
                    # full clones in this case.
                    logger.debug("erepo: unshallowing clone for '%s'", url)
                    _unshallow(git)
                    shallow = False
                    CLONES[url] = (clone_path, shallow)
                else:
                    logger.debug("erepo: git pull '%s'", url)
                    git.pull()
        else:
            logger.debug("erepo: git clone '%s' to a temporary dir", url)
            clone_path = tempfile.mkdtemp("dvc-clone")
            if not for_write and rev and not Git.is_sha(rev):
                # If rev is a tag or branch name try shallow clone first
                from dvc.scm.base import CloneError

                try:
                    git = Git.clone(url, clone_path, shallow_branch=rev)
                    shallow = True
                    logger.debug(
                        "erepo: using shallow clone for branch '%s'", rev
                    )
                except CloneError:
                    pass
            if not git:
                git = Git.clone(url, clone_path)
                shallow = False
            CLONES[url] = (clone_path, shallow)
    finally:
        if git:
            git.close()

    return clone_path, shallow
Exemplo n.º 7
0
    def reproduce(
        cls,
        dvc_dir: str,
        queue: "Queue",
        rev: str,
        cwd: Optional[str] = None,
        name: Optional[str] = None,
    ) -> Tuple[bool, Optional[str]]:
        """Run dvc repro and return the result.

        Returns tuple of (exp_hash, force) where exp_hash is the experiment
            hash (or None on error) and force is a bool specifying whether or
            not this experiment should force overwrite any existing duplicates.
        """
        unchanged = []

        queue.put((rev, os.getpid()))

        def filter_pipeline(stages):
            unchanged.extend([
                stage for stage in stages if isinstance(stage, PipelineStage)
            ])

        result = None
        force = False

        try:
            dvc = Repo(dvc_dir)
            old_cwd = os.getcwd()
            new_cwd = cwd if cwd else dvc.root_dir
            os.chdir(new_cwd)
            logger.debug("Running repro in '%s'", cwd)

            args_path = os.path.join(dvc.tmp_dir,
                                     BaseExecutor.PACKED_ARGS_FILE)
            if os.path.exists(args_path):
                args, kwargs = BaseExecutor.unpack_repro_args(args_path)
                remove(args_path)
            else:
                args = []
                kwargs = {}

            force = kwargs.get("force", False)

            # NOTE: for checkpoint experiments we handle persist outs slightly
            # differently than normal:
            #
            # - checkpoint out may not yet exist if this is the first time this
            #   experiment has been run, this is not an error condition for
            #   experiments
            # - at the start of a repro run, we need to remove the persist out
            #   and restore it to its last known (committed) state (which may
            #   be removed/does not yet exist) so that our executor workspace
            #   is not polluted with the (persistent) out from an unrelated
            #   experiment run
            dvc.checkout(force=True, quiet=True)

            # We cannot use dvc.scm to make commits inside the executor since
            # cached props are not picklable.
            scm = Git()
            checkpoint_func = partial(cls.checkpoint_callback, scm, name)
            stages = dvc.reproduce(
                *args,
                on_unchanged=filter_pipeline,
                checkpoint_func=checkpoint_func,
                **kwargs,
            )

            exp_hash = cls.hash_exp(stages)
            exp_rev = cls.commit(scm, exp_hash, exp_name=name)
            if scm.get_ref(EXEC_CHECKPOINT):
                scm.set_ref(EXEC_CHECKPOINT, exp_rev)
        except UnchangedExperimentError:
            pass
        finally:
            if scm:
                scm.close()
                del scm
            if old_cwd:
                os.chdir(old_cwd)

        # ideally we would return stages here like a normal repro() call, but
        # stages is not currently picklable and cannot be returned across
        # multiprocessing calls
        return result, force
Exemplo n.º 8
0
def _git_checkout(repo_path, rev):
    git = Git(repo_path)
    try:
        git.checkout(rev)
    finally:
        git.close()