def checkout_exp(self, rev, allow_missing=False): """Checkout an experiment to the user's workspace.""" from git.exc import GitCommandError from dvc.repo.checkout import checkout as dvc_checkout baseline_rev = self._check_baseline(rev) self._scm_checkout(rev) tmp = tempfile.NamedTemporaryFile(delete=False).name self.scm.repo.head.commit.diff( baseline_rev, patch=True, full_index=True, binary=True, output=tmp ) dirty = self.repo.scm.is_dirty() if dirty: logger.debug("Stashing workspace changes.") self.repo.scm.repo.git.stash("push", "--include-untracked") try: if os.path.getsize(tmp): logger.debug("Patching local workspace") self.repo.scm.repo.git.apply(tmp, reverse=True) need_checkout = True else: need_checkout = False except GitCommandError: raise DvcException("failed to apply experiment changes.") finally: remove(tmp) if dirty: self._unstash_workspace() if need_checkout: dvc_checkout(self.repo, allow_missing=allow_missing)
def apply(repo, rev, force=True, **kwargs): from dvc.repo.checkout import checkout as dvc_checkout from dvc.scm.base import MergeConflictError, SCMError exps = repo.experiments try: exp_rev = repo.scm.resolve_rev(rev) exps.check_baseline(exp_rev) except (RevError, BaselineMismatchError) as exc: raise InvalidExpRevError(rev) from exc stash_rev = exp_rev in exps.stash_revs if not stash_rev and not exps.get_branch_by_rev(exp_rev, allow_multiple=True): raise InvalidExpRevError(exp_rev) # Note that we don't use stash_workspace() here since we need finer control # over the merge behavior when we unstash everything if repo.scm.is_dirty(untracked_files=True): logger.debug("Stashing workspace") workspace = repo.scm.stash.push(include_untracked=True) else: workspace = None repo.scm.merge(exp_rev, commit=False) if workspace: try: repo.scm.stash.apply(workspace) except MergeConflictError as exc: # Applied experiment conflicts with user's workspace changes if force: # prefer applied experiment changes over prior stashed changes repo.scm.checkout_index(ours=True) else: # revert applied changes and restore user's workspace repo.scm.reset(hard=True) repo.scm.stash.pop() raise ApplyConflictError(rev) from exc except SCMError as exc: raise ApplyConflictError(rev) from exc repo.scm.stash.drop() repo.scm.reset() if stash_rev: args_path = os.path.join(repo.tmp_dir, BaseExecutor.PACKED_ARGS_FILE) if os.path.exists(args_path): remove(args_path) dvc_checkout(repo, **kwargs) repo.scm.set_ref(EXEC_APPLY, exp_rev) logger.info( "Changes for experiment '%s' have been applied to your current " "workspace.", rev, )
def apply(repo, rev, *args, **kwargs): from git.exc import GitCommandError from dvc.repo.checkout import checkout as dvc_checkout from dvc.repo.experiments import BaselineMismatchError exps = repo.experiments try: exps.check_baseline(rev) except BaselineMismatchError as exc: raise ApplyError(rev) from exc stash_rev = rev in exps.stash_revs if stash_rev: branch = rev else: branch = exps.get_branch_containing(rev) if not branch: raise ApplyError(rev) # Note that we don't use stash_workspace() here since we need finer control # over the merge behavior when we unstash everything if repo.scm.is_dirty(untracked_files=True): logger.debug("Stashing workspace") workspace = repo.scm.stash.push(include_untracked=True) else: workspace = None repo.scm.gitpython.repo.git.merge(branch, squash=True, no_commit=True) if workspace: try: repo.scm.stash.apply(workspace) except GitCommandError: # if stash apply returns merge conflicts, prefer experiment # changes over prior stashed changes repo.scm.gitpython.repo.git.checkout("--ours", "--", ".") repo.scm.stash.drop() repo.scm.gitpython.repo.git.reset() if stash_rev: args_path = os.path.join(repo.tmp_dir, BaseExecutor.PACKED_ARGS_FILE) if os.path.exists(args_path): remove(args_path) dvc_checkout(repo, **kwargs) logger.info( "Changes for experiment '%s' have been applied to your current " "workspace.", rev, )
def checkout_exp(self, rev, **kwargs): """Checkout an experiment to the user's workspace.""" from git.exc import GitCommandError from dvc.repo.checkout import checkout as dvc_checkout baseline_rev = self._check_baseline(rev) self._scm_checkout(rev) branch = self._get_branch_containing(rev) m = self.BRANCH_RE.match(branch) if branch else None if m and m.group("checkpoint"): kwargs.update({"allow_missing": True, "quiet": True}) tmp = tempfile.NamedTemporaryFile(delete=False).name self.scm.repo.head.commit.diff(baseline_rev, patch=True, full_index=True, binary=True, output=tmp) dirty = self.repo.scm.is_dirty(untracked_files=True) if dirty: logger.debug("Stashing workspace changes.") self.repo.scm.repo.git.stash("push", "--include-untracked") try: if os.path.getsize(tmp): logger.debug("Patching local workspace") self.repo.scm.repo.git.apply(tmp, reverse=True) need_checkout = True else: need_checkout = False except GitCommandError: raise DvcException("failed to apply experiment changes.") finally: remove(tmp) if dirty: self._unstash_workspace() args_file = os.path.join(self.repo.tmp_dir, self.PACKED_ARGS_FILE) if os.path.exists(args_file): remove(args_file) if need_checkout: dvc_checkout(self.repo, **kwargs)
def apply(repo: "Repo", rev: str, force: bool = True, **kwargs): from scmrepo.exceptions import SCMError as _SCMError from dvc.repo.checkout import checkout as dvc_checkout from dvc.scm import GitMergeError, RevError, resolve_rev exps = repo.experiments try: exp_rev = resolve_rev(repo.scm, rev) exps.check_baseline(exp_rev) except (RevError, BaselineMismatchError) as exc: raise InvalidExpRevError(rev) from exc stash_rev = exp_rev in exps.stash_revs if not stash_rev and not exps.get_branch_by_rev(exp_rev, allow_multiple=True): raise InvalidExpRevError(exp_rev) # NOTE: we don't use scmrepo's stash_workspace() here since we need # finer control over the merge behavior when we unstash everything with _apply_workspace(repo, rev, force): try: repo.scm.merge(exp_rev, commit=False, squash=True) except _SCMError as exc: raise GitMergeError(str(exc), scm=repo.scm) repo.scm.reset() if stash_rev: args_path = os.path.join(repo.tmp_dir, BaseExecutor.PACKED_ARGS_FILE) if os.path.exists(args_path): remove(args_path) dvc_checkout(repo, **kwargs) repo.scm.set_ref(EXEC_APPLY, exp_rev) logger.info( "Changes for experiment '%s' have been applied to your current " "workspace.", rev, )
def checkout_exp(self, rev, force=False): """Checkout an experiment to the user's workspace.""" from git.exc import GitCommandError from dvc.repo.checkout import _checkout as dvc_checkout if force: self.repo.scm.repo.git.reset(hard=True) self._scm_checkout(rev) tmp = tempfile.NamedTemporaryFile(delete=False).name self.scm.repo.head.commit.diff("HEAD~1", patch=True, output=tmp) try: if os.path.getsize(tmp): logger.debug("Patching local workspace") self.repo.scm.repo.git.apply(tmp, reverse=True) dvc_checkout(self.repo) except GitCommandError: raise DvcException( "Checkout failed, experiment contains changes which " "conflict with your current workspace. To overwrite " "your workspace, use `dvc experiments checkout --force`.") finally: remove(tmp)
def reproduce( cls, dvc_dir: Optional[str], rev: str, queue: Optional["Queue"] = None, rel_cwd: Optional[str] = None, name: Optional[str] = None, log_errors: bool = True, log_level: Optional[int] = None, **kwargs, ) -> "ExecutorResult": """Run dvc repro and return the result. Returns tuple of (exp_hash, exp_ref, force) where exp_hash is the experiment hash (or None on error), exp_ref is the experiment ref, and force is a bool specifying whether or not this experiment should force overwrite any existing duplicates. """ from dvc.repo.checkout import checkout as dvc_checkout from dvc.repo.reproduce import reproduce as dvc_reproduce auto_push = env2bool(DVC_EXP_AUTO_PUSH) git_remote = os.getenv(DVC_EXP_GIT_REMOTE, None) unchanged = [] if queue is not None: queue.put((rev, os.getpid())) if log_errors and log_level is not None: cls._set_log_level(log_level) def filter_pipeline(stages): unchanged.extend([ stage for stage in stages if isinstance(stage, PipelineStage) ]) exp_hash: Optional[str] = None exp_ref: Optional["ExpRefInfo"] = None repro_force: bool = False with cls._repro_dvc( dvc_dir, rel_cwd, log_errors, **kwargs, ) as dvc: if auto_push: cls._validate_remotes(dvc, git_remote) args, kwargs = cls._repro_args(dvc) if args: targets: Optional[Union[list, str]] = args[0] else: targets = kwargs.get("targets") repro_force = kwargs.get("force", False) logger.trace( # type: ignore[attr-defined] "Executor repro with force = '%s'", str(repro_force)) repro_dry = kwargs.get("dry") # NOTE: checkpoint outs are handled as a special type of persist # out: # # - checkpoint out may not yet exist if this is the first time this # experiment has been run, this is not an error condition for # experiments # - if experiment was run with --reset, the checkpoint out will be # removed at the start of the experiment (regardless of any # dvc.lock entry for the checkpoint out) # - if run without --reset, the checkpoint out will be checked out # using any hash present in dvc.lock (or removed if no entry # exists in dvc.lock) checkpoint_reset: bool = kwargs.pop("reset", False) if not repro_dry: dvc_checkout( dvc, targets=targets, with_deps=targets is not None, force=True, quiet=True, allow_missing=True, checkpoint_reset=checkpoint_reset, recursive=kwargs.get("recursive", False), ) checkpoint_func = partial( cls.checkpoint_callback, dvc, dvc.scm, name, repro_force or checkpoint_reset, ) stages = dvc_reproduce( dvc, *args, on_unchanged=filter_pipeline, checkpoint_func=checkpoint_func, **kwargs, ) exp_hash = cls.hash_exp(stages) if not repro_dry: try: is_checkpoint = any(stage.is_checkpoint for stage in stages) if is_checkpoint and checkpoint_reset: # For reset checkpoint stages, we need to force # overwriting existing checkpoint refs even though # repro may not have actually been run with --force repro_force = True cls.commit( dvc.scm, exp_hash, exp_name=name, force=repro_force, checkpoint=is_checkpoint, ) if auto_push: cls._auto_push(dvc, dvc.scm, git_remote) except UnchangedExperimentError: pass ref = dvc.scm.get_ref(EXEC_BRANCH, follow=False) if ref: exp_ref = ExpRefInfo.from_ref(ref) if cls.WARN_UNTRACKED: untracked = dvc.scm.untracked_files() if untracked: logger.warning( "The following untracked files were present in " "the experiment directory after reproduction but " "will not be included in experiment commits:\n" "\t%s", ", ".join(untracked), ) # ideally we would return stages here like a normal repro() call, but # stages is not currently picklable and cannot be returned across # multiprocessing calls return ExecutorResult(exp_hash, exp_ref, repro_force)
def reproduce( cls, dvc_dir: Optional[str], rev: str, queue: Optional["Queue"] = None, rel_cwd: Optional[str] = None, name: Optional[str] = None, log_level: Optional[int] = None, ) -> "ExecutorResult": """Run dvc repro and return the result. Returns tuple of (exp_hash, exp_ref, force) where exp_hash is the experiment hash (or None on error), exp_ref is the experiment ref, and force is a bool specifying whether or not this experiment should force overwrite any existing duplicates. """ from dvc.repo.checkout import checkout as dvc_checkout from dvc.repo.reproduce import reproduce as dvc_reproduce unchanged = [] if queue is not None: queue.put((rev, os.getpid())) if log_level is not None: cls._set_log_level(log_level) def filter_pipeline(stages): unchanged.extend([ stage for stage in stages if isinstance(stage, PipelineStage) ]) exp_hash: Optional[str] = None exp_ref: Optional["ExpRefInfo"] = None repro_force: bool = False with cls._repro_dvc(dvc_dir, rel_cwd) as dvc: args, kwargs = cls._repro_args(dvc) if args: targets: Optional[Union[list, str]] = args[0] else: targets = kwargs.get("targets") repro_force = kwargs.get("force", False) logger.trace( # type: ignore[attr-defined] "Executor repro with force = '%s'", str(repro_force)) # NOTE: for checkpoint experiments we handle persist outs slightly # differently than normal: # # - checkpoint out may not yet exist if this is the first time this # experiment has been run, this is not an error condition for # experiments # - at the start of a repro run, we need to remove the persist out # and restore it to its last known (committed) state (which may # be removed/does not yet exist) so that our executor workspace # is not polluted with the (persistent) out from an unrelated # experiment run dvc_checkout( dvc, targets=targets, with_deps=targets is not None, force=True, quiet=True, allow_missing=True, ) checkpoint_func = partial(cls.checkpoint_callback, dvc.scm, name, repro_force) stages = dvc_reproduce( dvc, *args, on_unchanged=filter_pipeline, checkpoint_func=checkpoint_func, **kwargs, ) exp_hash = cls.hash_exp(stages) try: cls.commit( dvc.scm, exp_hash, exp_name=name, force=repro_force, checkpoint=any(stage.is_checkpoint for stage in stages), ) except UnchangedExperimentError: pass ref = dvc.scm.get_ref(EXEC_BRANCH, follow=False) if ref: exp_ref = ExpRefInfo.from_ref(ref) if cls.WARN_UNTRACKED: untracked = dvc.scm.untracked_files() if untracked: logger.warning( "The following untracked files were present in the " "experiment directory after reproduction but will " "not be included in experiment commits:\n" "\t%s", ", ".join(untracked), ) # ideally we would return stages here like a normal repro() call, but # stages is not currently picklable and cannot be returned across # multiprocessing calls return ExecutorResult(exp_hash, exp_ref, repro_force)
def reproduce( cls, dvc_dir: str, queue: "Queue", rev: str, cwd: Optional[str] = None, name: Optional[str] = None, ) -> Tuple[Optional[str], bool]: """Run dvc repro and return the result. Returns tuple of (exp_hash, force) where exp_hash is the experiment hash (or None on error) and force is a bool specifying whether or not this experiment should force overwrite any existing duplicates. """ from dvc.repo.checkout import checkout as dvc_checkout from dvc.repo.reproduce import reproduce as dvc_reproduce unchanged = [] queue.put((rev, os.getpid())) def filter_pipeline(stages): unchanged.extend( [stage for stage in stages if isinstance(stage, PipelineStage)] ) result: Optional[str] = None repro_force: bool = False try: dvc = Repo(dvc_dir) old_cwd = os.getcwd() new_cwd = cwd if cwd else dvc.root_dir os.chdir(new_cwd) logger.debug("Running repro in '%s'", cwd) args_path = os.path.join( dvc.tmp_dir, BaseExecutor.PACKED_ARGS_FILE ) if os.path.exists(args_path): args, kwargs = BaseExecutor.unpack_repro_args(args_path) remove(args_path) else: args = [] kwargs = {} repro_force = kwargs.get("force", False) # NOTE: for checkpoint experiments we handle persist outs slightly # differently than normal: # # - checkpoint out may not yet exist if this is the first time this # experiment has been run, this is not an error condition for # experiments # - at the start of a repro run, we need to remove the persist out # and restore it to its last known (committed) state (which may # be removed/does not yet exist) so that our executor workspace # is not polluted with the (persistent) out from an unrelated # experiment run dvc_checkout(dvc, force=True, quiet=True) checkpoint_func = partial(cls.checkpoint_callback, dvc.scm, name) stages = dvc_reproduce( dvc, *args, on_unchanged=filter_pipeline, checkpoint_func=checkpoint_func, **kwargs, ) exp_hash = cls.hash_exp(stages) result = exp_hash exp_rev = cls.commit(dvc.scm, exp_hash, exp_name=name) if dvc.scm.get_ref(EXEC_CHECKPOINT): dvc.scm.set_ref(EXEC_CHECKPOINT, exp_rev) except UnchangedExperimentError: pass finally: if dvc: dvc.scm.close() if old_cwd: os.chdir(old_cwd) # ideally we would return stages here like a normal repro() call, but # stages is not currently picklable and cannot be returned across # multiprocessing calls return result, repro_force
def reproduce( cls, info: "ExecutorInfo", rev: str, queue: Optional["Queue"] = None, infofile: Optional[str] = None, log_errors: bool = True, log_level: Optional[int] = None, **kwargs, ) -> "ExecutorResult": """Run dvc repro and return the result. Returns tuple of (exp_hash, exp_ref, force) where exp_hash is the experiment hash (or None on error), exp_ref is the experiment ref, and force is a bool specifying whether or not this experiment should force overwrite any existing duplicates. """ from dvc.repo.checkout import checkout as dvc_checkout from dvc.repo.reproduce import reproduce as dvc_reproduce from dvc.stage import PipelineStage auto_push = env2bool(DVC_EXP_AUTO_PUSH) git_remote = os.getenv(DVC_EXP_GIT_REMOTE, None) unchanged = [] if queue is not None: queue.put((rev, os.getpid())) if log_errors and log_level is not None: cls._set_log_level(log_level) def filter_pipeline(stages): unchanged.extend( [stage for stage in stages if isinstance(stage, PipelineStage)] ) exp_hash: Optional[str] = None exp_ref: Optional["ExpRefInfo"] = None repro_force: bool = False if infofile is not None: info.dump_json(infofile) with cls._repro_dvc( info, log_errors=log_errors, **kwargs, ) as dvc: if auto_push: cls._validate_remotes(dvc, git_remote) args, kwargs = cls._repro_args(dvc) if args: targets: Optional[Union[list, str]] = args[0] else: targets = kwargs.get("targets") repro_force = kwargs.get("force", False) logger.trace( # type: ignore[attr-defined] "Executor repro with force = '%s'", str(repro_force) ) repro_dry = kwargs.get("dry") # NOTE: checkpoint outs are handled as a special type of persist # out: # # - checkpoint out may not yet exist if this is the first time this # experiment has been run, this is not an error condition for # experiments # - if experiment was run with --reset, the checkpoint out will be # removed at the start of the experiment (regardless of any # dvc.lock entry for the checkpoint out) # - if run without --reset, the checkpoint out will be checked out # using any hash present in dvc.lock (or removed if no entry # exists in dvc.lock) checkpoint_reset: bool = kwargs.pop("reset", False) if not repro_dry: dvc_checkout( dvc, targets=targets, with_deps=targets is not None, force=True, quiet=True, allow_missing=True, checkpoint_reset=checkpoint_reset, recursive=kwargs.get("recursive", False), ) checkpoint_func = partial( cls.checkpoint_callback, dvc, dvc.scm, info.name, repro_force or checkpoint_reset, ) stages = dvc_reproduce( dvc, *args, on_unchanged=filter_pipeline, checkpoint_func=checkpoint_func, **kwargs, ) exp_hash = cls.hash_exp(stages) if not repro_dry: ref, exp_ref, repro_force = cls._repro_commit( dvc, info, stages, exp_hash, checkpoint_reset, auto_push, git_remote, repro_force, ) info.result_hash = exp_hash info.result_ref = ref info.result_force = repro_force if infofile is not None: info.dump_json(infofile) # ideally we would return stages here like a normal repro() call, but # stages is not currently picklable and cannot be returned across # multiprocessing calls return ExecutorResult(exp_hash, exp_ref, repro_force)