Пример #1
0
 def iter_ref_infos_by_name(self, name: str):
     for ref in self.scm.iter_refs(base=EXPS_NAMESPACE):
         if ref.startswith(EXEC_NAMESPACE) or ref == EXPS_STASH:
             continue
         ref_info = ExpRefInfo.from_ref(ref)
         if ref_info.name == name:
             yield ref_info
Пример #2
0
def test_show_checkpoint_branch(tmp_dir, scm, dvc, checkpoint_stage, capsys,
                                workspace):
    results = dvc.experiments.run(checkpoint_stage.addressing,
                                  params=["foo=2"],
                                  tmp_dir=not workspace)
    branch_rev = first(results)
    if not workspace:
        dvc.experiments.apply(branch_rev)

    results = dvc.experiments.run(
        checkpoint_stage.addressing,
        checkpoint_resume=branch_rev,
        tmp_dir=not workspace,
    )
    checkpoint_a = first(results)

    dvc.experiments.apply(branch_rev)
    results = dvc.experiments.run(
        checkpoint_stage.addressing,
        checkpoint_resume=branch_rev,
        params=["foo=100"],
        tmp_dir=not workspace,
    )
    checkpoint_b = first(results)

    capsys.readouterr()
    assert main(["exp", "show", "--no-pager"]) == 0
    cap = capsys.readouterr()

    for rev in (checkpoint_a, checkpoint_b):
        ref = dvc.experiments.get_branch_by_rev(rev)
        ref_info = ExpRefInfo.from_ref(ref)
        name = f"{rev[:7]} [{ref_info.name}]"
        assert f"╓ {name}" in cap.out
    assert f"({branch_rev[:7]})" in cap.out
Пример #3
0
def test_show_checkpoint_branch(tmp_dir, scm, dvc, checkpoint_stage, capsys):
    results = dvc.experiments.run(checkpoint_stage.addressing,
                                  params=["foo=2"])
    branch_rev = first(results)

    results = dvc.experiments.run(checkpoint_stage.addressing,
                                  checkpoint_resume=branch_rev)
    checkpoint_a = first(results)

    results = dvc.experiments.run(
        checkpoint_stage.addressing,
        checkpoint_resume=branch_rev,
        params=["foo=100"],
    )
    checkpoint_b = first(results)

    capsys.readouterr()
    assert main(["exp", "show", "--no-pager"]) == 0
    cap = capsys.readouterr()

    for rev in (checkpoint_a, checkpoint_b):
        ref = dvc.experiments.get_branch_containing(rev)
        ref_info = ExpRefInfo.from_ref(ref)
        name = ref_info.name
        assert f"╓ {name}" in cap.out
    assert f"({branch_rev[:7]})" in cap.out
Пример #4
0
def show(
    repo,
    all_branches=False,
    all_tags=False,
    revs=None,
    all_commits=False,
    sha_only=False,
):
    res = defaultdict(OrderedDict)

    if revs is None:
        revs = [repo.scm.get_rev()]

    revs = OrderedDict((rev, None) for rev in repo.brancher(
        revs=revs,
        all_branches=all_branches,
        all_tags=all_tags,
        all_commits=all_commits,
        sha_only=True,
    ))

    for rev in revs:
        res[rev]["baseline"] = _collect_experiment_commit(repo,
                                                          rev,
                                                          sha_only=sha_only)

        if rev == "workspace":
            continue

        ref_info = ExpRefInfo(baseline_sha=rev)
        commits = [(ref, repo.scm.resolve_commit(ref))
                   for ref in repo.scm.iter_refs(base=str(ref_info))]
        for exp_ref, _ in sorted(
                commits,
                key=lambda x: x[1].committed_date,
                reverse=True,
        ):
            ref_info = ExpRefInfo.from_ref(exp_ref)
            assert ref_info.baseline_sha == rev
            _collect_experiment_branch(res[rev],
                                       repo,
                                       exp_ref,
                                       rev,
                                       sha_only=sha_only)

    # collect queued (not yet reproduced) experiments
    for stash_rev, entry in repo.experiments.stash_revs.items():
        if entry.baseline_rev in revs:
            experiment = _collect_experiment_commit(repo,
                                                    stash_rev,
                                                    stash=True)
            res[entry.baseline_rev][stash_rev] = experiment

    return res
Пример #5
0
 def _get_baseline(self, rev):
     if rev in self.stash_revs:
         entry = self.stash_revs.get(rev)
         if entry:
             return entry.baseline_rev
         return None
     ref = first(self._get_exps_containing(rev))
     if not ref:
         return None
     try:
         ref_info = ExpRefInfo.from_ref(ref)
         return ref_info.baseline_sha
     except InvalidExpRefError:
         return None
Пример #6
0
def gc(
    repo,
    all_branches: Optional[bool] = False,
    all_tags: Optional[bool] = False,
    all_commits: Optional[bool] = False,
    workspace: Optional[bool] = False,
    queued: Optional[bool] = False,
):
    keep_revs = set(
        repo.brancher(
            all_branches=all_branches,
            all_tags=all_tags,
            all_commits=all_commits,
            sha_only=True,
        ))
    if workspace:
        keep_revs.add(repo.scm.get_rev())

    if not keep_revs:
        return 0

    removed = 0
    for ref in repo.scm.iter_refs(EXPS_NAMESPACE):
        if ref.startswith(EXEC_NAMESPACE) or ref == EXPS_STASH:
            continue
        ref_info = ExpRefInfo.from_ref(ref)
        if ref_info.baseline_sha not in keep_revs:
            repo.scm.remove_ref(ref)
            removed += 1

    delete_stashes = []
    for _, entry in repo.experiments.stash_revs.items():
        if not queued or entry.baseline_rev not in keep_revs:
            delete_stashes.append(entry.index)
    for index in sorted(delete_stashes, reverse=True):
        repo.experiments.stash.drop(index)
    removed += len(delete_stashes)

    return removed
Пример #7
0
def show(
    repo,
    all_branches=False,
    all_tags=False,
    revs=None,
    all_commits=False,
    sha_only=False,
    num=1,
):
    res = defaultdict(OrderedDict)

    if num < 1:
        raise InvalidArgumentError(f"Invalid number of commits '{num}'")

    if revs is None:
        revs = []
        for n in range(num):
            try:
                revs.append(repo.scm.resolve_rev(f"HEAD~{n}"))
            except SCMError:
                break

    revs = OrderedDict((rev, None) for rev in repo.brancher(
        revs=revs,
        all_branches=all_branches,
        all_tags=all_tags,
        all_commits=all_commits,
        sha_only=True,
    ))

    for rev in revs:
        res[rev]["baseline"] = _collect_experiment_commit(repo,
                                                          rev,
                                                          sha_only=sha_only)

        if rev == "workspace":
            continue

        ref_info = ExpRefInfo(baseline_sha=rev)
        commits = [(ref, repo.scm.resolve_commit(ref))
                   for ref in repo.scm.iter_refs(base=str(ref_info))]
        for exp_ref, _ in sorted(
                commits,
                key=lambda x: x[1].commit_time,
                reverse=True,
        ):
            ref_info = ExpRefInfo.from_ref(exp_ref)
            assert ref_info.baseline_sha == rev
            _collect_experiment_branch(res[rev],
                                       repo,
                                       exp_ref,
                                       rev,
                                       sha_only=sha_only)

    # collect queued (not yet reproduced) experiments
    for stash_rev, entry in repo.experiments.stash_revs.items():
        if entry.baseline_rev in revs:
            experiment = _collect_experiment_commit(repo,
                                                    stash_rev,
                                                    stash=True)
            res[entry.baseline_rev][stash_rev] = experiment

    return res
Пример #8
0
def show(
    repo: "Repo",
    all_branches=False,
    all_tags=False,
    revs: Union[List[str], str, None] = None,
    all_commits=False,
    sha_only=False,
    num=1,
    param_deps=False,
    onerror: Optional[Callable] = None,
    fetch_running: bool = True,
):

    if onerror is None:
        onerror = onerror_collect

    res: Dict[str, Dict] = defaultdict(OrderedDict)

    if not any([revs, all_branches, all_tags, all_commits]):
        revs = ["HEAD"]
    if isinstance(revs, str):
        revs = [revs]

    found_revs: Dict[str, List[str]] = {"workspace": []}
    found_revs.update(
        iter_revs(repo.scm, revs, num, all_branches, all_tags, all_commits))

    running = repo.experiments.get_running_exps(fetch_refs=fetch_running)

    for rev in found_revs:
        res[rev]["baseline"] = _collect_experiment_commit(
            repo,
            rev,
            sha_only=sha_only,
            param_deps=param_deps,
            running=running,
            onerror=onerror,
            is_baseline=True,
        )

        if rev == "workspace":
            continue

        ref_info = ExpRefInfo(baseline_sha=rev)
        commits = [(ref, repo.scm.resolve_commit(ref))
                   for ref in repo.scm.iter_refs(base=str(ref_info))]
        for exp_ref, _ in sorted(commits,
                                 key=lambda x: x[1].commit_time,
                                 reverse=True):
            ref_info = ExpRefInfo.from_ref(exp_ref)
            assert ref_info.baseline_sha == rev
            _collect_experiment_branch(
                res[rev],
                repo,
                exp_ref,
                rev,
                sha_only=sha_only,
                param_deps=param_deps,
                running=running,
                onerror=onerror,
            )
        # collect queued (not yet reproduced) experiments
        for stash_rev, entry in repo.experiments.stash_revs.items():
            if entry.baseline_rev in found_revs:
                if stash_rev not in running or not running[stash_rev].get(
                        "last"):
                    experiment = _collect_experiment_commit(
                        repo,
                        stash_rev,
                        sha_only=sha_only,
                        stash=stash_rev not in running,
                        param_deps=param_deps,
                        running=running,
                        onerror=onerror,
                    )
                    res[entry.baseline_rev][stash_rev] = experiment
    return res
Пример #9
0
    def reproduce(
        cls,
        dvc_dir: Optional[str],
        rev: str,
        queue: Optional["Queue"] = None,
        rel_cwd: Optional[str] = None,
        name: Optional[str] = None,
        log_errors: bool = True,
        log_level: Optional[int] = None,
        **kwargs,
    ) -> "ExecutorResult":
        """Run dvc repro and return the result.

        Returns tuple of (exp_hash, exp_ref, force) where exp_hash is the
            experiment hash (or None on error), exp_ref is the experiment ref,
            and force is a bool specifying whether or not this experiment
            should force overwrite any existing duplicates.
        """
        from dvc.repo.checkout import checkout as dvc_checkout
        from dvc.repo.reproduce import reproduce as dvc_reproduce

        auto_push = env2bool(DVC_EXP_AUTO_PUSH)
        git_remote = os.getenv(DVC_EXP_GIT_REMOTE, None)

        unchanged = []

        if queue is not None:
            queue.put((rev, os.getpid()))
        if log_errors and log_level is not None:
            cls._set_log_level(log_level)

        def filter_pipeline(stages):
            unchanged.extend([
                stage for stage in stages if isinstance(stage, PipelineStage)
            ])

        exp_hash: Optional[str] = None
        exp_ref: Optional["ExpRefInfo"] = None
        repro_force: bool = False

        with cls._repro_dvc(
                dvc_dir,
                rel_cwd,
                log_errors,
                **kwargs,
        ) as dvc:
            if auto_push:
                cls._validate_remotes(dvc, git_remote)

            args, kwargs = cls._repro_args(dvc)
            if args:
                targets: Optional[Union[list, str]] = args[0]
            else:
                targets = kwargs.get("targets")

            repro_force = kwargs.get("force", False)
            logger.trace(  # type: ignore[attr-defined]
                "Executor repro with force = '%s'", str(repro_force))

            repro_dry = kwargs.get("dry")

            # NOTE: checkpoint outs are handled as a special type of persist
            # out:
            #
            # - checkpoint out may not yet exist if this is the first time this
            #   experiment has been run, this is not an error condition for
            #   experiments
            # - if experiment was run with --reset, the checkpoint out will be
            #   removed at the start of the experiment (regardless of any
            #   dvc.lock entry for the checkpoint out)
            # - if run without --reset, the checkpoint out will be checked out
            #   using any hash present in dvc.lock (or removed if no entry
            #   exists in dvc.lock)
            checkpoint_reset: bool = kwargs.pop("reset", False)
            if not repro_dry:
                dvc_checkout(
                    dvc,
                    targets=targets,
                    with_deps=targets is not None,
                    force=True,
                    quiet=True,
                    allow_missing=True,
                    checkpoint_reset=checkpoint_reset,
                    recursive=kwargs.get("recursive", False),
                )

            checkpoint_func = partial(
                cls.checkpoint_callback,
                dvc,
                dvc.scm,
                name,
                repro_force or checkpoint_reset,
            )
            stages = dvc_reproduce(
                dvc,
                *args,
                on_unchanged=filter_pipeline,
                checkpoint_func=checkpoint_func,
                **kwargs,
            )

            exp_hash = cls.hash_exp(stages)
            if not repro_dry:
                try:
                    is_checkpoint = any(stage.is_checkpoint
                                        for stage in stages)
                    if is_checkpoint and checkpoint_reset:
                        # For reset checkpoint stages, we need to force
                        # overwriting existing checkpoint refs even though
                        # repro may not have actually been run with --force
                        repro_force = True
                    cls.commit(
                        dvc.scm,
                        exp_hash,
                        exp_name=name,
                        force=repro_force,
                        checkpoint=is_checkpoint,
                    )
                    if auto_push:
                        cls._auto_push(dvc, dvc.scm, git_remote)
                except UnchangedExperimentError:
                    pass
                ref = dvc.scm.get_ref(EXEC_BRANCH, follow=False)
                if ref:
                    exp_ref = ExpRefInfo.from_ref(ref)
                if cls.WARN_UNTRACKED:
                    untracked = dvc.scm.untracked_files()
                    if untracked:
                        logger.warning(
                            "The following untracked files were present in "
                            "the experiment directory after reproduction but "
                            "will not be included in experiment commits:\n"
                            "\t%s",
                            ", ".join(untracked),
                        )

        # ideally we would return stages here like a normal repro() call, but
        # stages is not currently picklable and cannot be returned across
        # multiprocessing calls
        return ExecutorResult(exp_hash, exp_ref, repro_force)
Пример #10
0
    def reproduce(
        cls,
        dvc_dir: Optional[str],
        rev: str,
        queue: Optional["Queue"] = None,
        rel_cwd: Optional[str] = None,
        name: Optional[str] = None,
        log_level: Optional[int] = None,
    ) -> "ExecutorResult":
        """Run dvc repro and return the result.

        Returns tuple of (exp_hash, exp_ref, force) where exp_hash is the
            experiment hash (or None on error), exp_ref is the experiment ref,
            and force is a bool specifying whether or not this experiment
            should force overwrite any existing duplicates.
        """
        from dvc.repo.checkout import checkout as dvc_checkout
        from dvc.repo.reproduce import reproduce as dvc_reproduce

        unchanged = []

        if queue is not None:
            queue.put((rev, os.getpid()))
        if log_level is not None:
            cls._set_log_level(log_level)

        def filter_pipeline(stages):
            unchanged.extend([
                stage for stage in stages if isinstance(stage, PipelineStage)
            ])

        exp_hash: Optional[str] = None
        exp_ref: Optional["ExpRefInfo"] = None
        repro_force: bool = False

        with cls._repro_dvc(dvc_dir, rel_cwd) as dvc:
            args, kwargs = cls._repro_args(dvc)
            if args:
                targets: Optional[Union[list, str]] = args[0]
            else:
                targets = kwargs.get("targets")

            repro_force = kwargs.get("force", False)
            logger.trace(  # type: ignore[attr-defined]
                "Executor repro with force = '%s'", str(repro_force))

            # NOTE: for checkpoint experiments we handle persist outs slightly
            # differently than normal:
            #
            # - checkpoint out may not yet exist if this is the first time this
            #   experiment has been run, this is not an error condition for
            #   experiments
            # - at the start of a repro run, we need to remove the persist out
            #   and restore it to its last known (committed) state (which may
            #   be removed/does not yet exist) so that our executor workspace
            #   is not polluted with the (persistent) out from an unrelated
            #   experiment run
            dvc_checkout(
                dvc,
                targets=targets,
                with_deps=targets is not None,
                force=True,
                quiet=True,
                allow_missing=True,
            )

            checkpoint_func = partial(cls.checkpoint_callback, dvc.scm, name,
                                      repro_force)
            stages = dvc_reproduce(
                dvc,
                *args,
                on_unchanged=filter_pipeline,
                checkpoint_func=checkpoint_func,
                **kwargs,
            )

            exp_hash = cls.hash_exp(stages)
            try:
                cls.commit(
                    dvc.scm,
                    exp_hash,
                    exp_name=name,
                    force=repro_force,
                    checkpoint=any(stage.is_checkpoint for stage in stages),
                )
            except UnchangedExperimentError:
                pass
            ref = dvc.scm.get_ref(EXEC_BRANCH, follow=False)
            if ref:
                exp_ref = ExpRefInfo.from_ref(ref)
            if cls.WARN_UNTRACKED:
                untracked = dvc.scm.untracked_files()
                if untracked:
                    logger.warning(
                        "The following untracked files were present in the "
                        "experiment directory after reproduction but will "
                        "not be included in experiment commits:\n"
                        "\t%s",
                        ", ".join(untracked),
                    )

        # ideally we would return stages here like a normal repro() call, but
        # stages is not currently picklable and cannot be returned across
        # multiprocessing calls
        return ExecutorResult(exp_hash, exp_ref, repro_force)
Пример #11
0
def show(
    repo,
    all_branches=False,
    all_tags=False,
    revs=None,
    all_commits=False,
    sha_only=False,
    num=1,
    param_deps=False,
    onerror: Optional[Callable] = None,
):
    if onerror is None:
        onerror = onerror_collect

    res: Dict[str, Dict] = defaultdict(OrderedDict)

    if num < 1:
        raise InvalidArgumentError(f"Invalid number of commits '{num}'")

    if revs is None:
        from dvc.scm import RevError, resolve_rev

        revs = []
        for n in range(num):
            try:
                head = fix_exp_head(repo.scm, f"HEAD~{n}")
                assert head
                revs.append(resolve_rev(repo.scm, head))
            except RevError:
                break

    revs = OrderedDict((rev, None) for rev in repo.brancher(
        revs=revs,
        all_branches=all_branches,
        all_tags=all_tags,
        all_commits=all_commits,
        sha_only=True,
    ))

    running = repo.experiments.get_running_exps()

    for rev in revs:
        res[rev]["baseline"] = _collect_experiment_commit(
            repo,
            rev,
            sha_only=sha_only,
            param_deps=param_deps,
            running=running,
            onerror=onerror,
        )

        if rev == "workspace":
            continue

        ref_info = ExpRefInfo(baseline_sha=rev)
        commits = [(ref, repo.scm.resolve_commit(ref))
                   for ref in repo.scm.iter_refs(base=str(ref_info))]
        for exp_ref, _ in sorted(commits,
                                 key=lambda x: x[1].commit_time,
                                 reverse=True):
            ref_info = ExpRefInfo.from_ref(exp_ref)
            assert ref_info.baseline_sha == rev
            _collect_experiment_branch(
                res[rev],
                repo,
                exp_ref,
                rev,
                sha_only=sha_only,
                param_deps=param_deps,
                running=running,
                onerror=onerror,
            )
        # collect queued (not yet reproduced) experiments
        for stash_rev, entry in repo.experiments.stash_revs.items():
            if entry.baseline_rev in revs:
                if stash_rev not in running or not running[stash_rev].get(
                        "last"):
                    experiment = _collect_experiment_commit(
                        repo,
                        stash_rev,
                        sha_only=sha_only,
                        stash=stash_rev not in running,
                        param_deps=param_deps,
                        running=running,
                        onerror=onerror,
                    )
                    res[entry.baseline_rev][stash_rev] = experiment
    return res
Пример #12
0
 def get_exact_name(self, rev: str):
     exclude = f"{EXEC_NAMESPACE}/*"
     ref = self.scm.describe(rev, base=EXPS_NAMESPACE, exclude=exclude)
     if ref:
         return ExpRefInfo.from_ref(ref).name
     return None
Пример #13
0
 def on_diverged(ref: str, checkpoint: bool):
     ref_info = ExpRefInfo.from_ref(ref)
     if checkpoint:
         raise CheckpointExistsError(ref_info.name)
     raise ExperimentExistsError(ref_info.name)
Пример #14
0
    def reproduce(
        cls,
        dvc_dir: Optional[str],
        rev: str,
        queue: Optional["Queue"] = None,
        rel_cwd: Optional[str] = None,
        name: Optional[str] = None,
        log_level: Optional[int] = None,
    ) -> "ExecutorResult":
        """Run dvc repro and return the result.

        Returns tuple of (exp_hash, exp_ref, force) where exp_hash is the
            experiment hash (or None on error), exp_ref is the experiment ref,
            and force is a bool specifying whether or not this experiment
            should force overwrite any existing duplicates.
        """
        from dvc.repo import Repo
        from dvc.repo.checkout import checkout as dvc_checkout
        from dvc.repo.reproduce import reproduce as dvc_reproduce

        unchanged = []

        if queue is not None:
            queue.put((rev, os.getpid()))
        if log_level is not None:
            cls._set_log_level(log_level)

        def filter_pipeline(stages):
            unchanged.extend(
                [stage for stage in stages if isinstance(stage, PipelineStage)]
            )

        exp_hash: Optional[str] = None
        exp_ref: Optional["ExpRefInfo"] = None
        repro_force: bool = False

        try:
            dvc = Repo(dvc_dir)
            if dvc_dir is not None:
                old_cwd = os.getcwd()
                if rel_cwd:
                    os.chdir(os.path.join(dvc.root_dir, rel_cwd))
                else:
                    os.chdir(dvc.root_dir)
            else:
                old_cwd = None
            logger.debug("Running repro in '%s'", os.getcwd())

            args_path = os.path.join(
                dvc.tmp_dir, BaseExecutor.PACKED_ARGS_FILE
            )
            if os.path.exists(args_path):
                args, kwargs = BaseExecutor.unpack_repro_args(args_path)
                remove(args_path)
            else:
                args = []
                kwargs = {}

            repro_force = kwargs.get("force", False)
            logger.debug("force = %s", str(repro_force))

            # NOTE: for checkpoint experiments we handle persist outs slightly
            # differently than normal:
            #
            # - checkpoint out may not yet exist if this is the first time this
            #   experiment has been run, this is not an error condition for
            #   experiments
            # - at the start of a repro run, we need to remove the persist out
            #   and restore it to its last known (committed) state (which may
            #   be removed/does not yet exist) so that our executor workspace
            #   is not polluted with the (persistent) out from an unrelated
            #   experiment run
            dvc_checkout(dvc, force=True, quiet=True)

            checkpoint_func = partial(
                cls.checkpoint_callback, dvc.scm, name, repro_force
            )
            stages = dvc_reproduce(
                dvc,
                *args,
                on_unchanged=filter_pipeline,
                checkpoint_func=checkpoint_func,
                **kwargs,
            )

            exp_hash = cls.hash_exp(stages)
            try:
                cls.commit(
                    dvc.scm,
                    exp_hash,
                    exp_name=name,
                    force=repro_force,
                    checkpoint=any(stage.is_checkpoint for stage in stages),
                )
            except UnchangedExperimentError:
                pass
            ref = dvc.scm.get_ref(EXEC_BRANCH, follow=False)
            if ref:
                exp_ref = ExpRefInfo.from_ref(ref)
        finally:
            if dvc:
                dvc.scm.close()
            if old_cwd:
                os.chdir(old_cwd)

        # ideally we would return stages here like a normal repro() call, but
        # stages is not currently picklable and cannot be returned across
        # multiprocessing calls
        return ExecutorResult(exp_hash, exp_ref, repro_force)