def _repro_dvc(cls, dvc_dir: Optional[str], rel_cwd: Optional[str]): from dvc.repo import Repo dvc = Repo(dvc_dir) if cls.QUIET: dvc.scm.quiet = cls.QUIET if dvc_dir is not None: old_cwd: Optional[str] = os.getcwd() if rel_cwd: os.chdir(os.path.join(dvc.root_dir, rel_cwd)) else: os.chdir(dvc.root_dir) else: old_cwd = None logger.debug("Running repro in '%s'", os.getcwd()) try: yield dvc except CheckpointKilledError: raise except DvcException: logger.exception("") raise except Exception: logger.exception("unexpected error") raise finally: dvc.close() if old_cwd: os.chdir(old_cwd)
def external_repo(url=None, rev=None, rev_lock=None, cache_dir=None): from dvc.repo import Repo path = _external_repo(url=url, rev=rev_lock or rev, cache_dir=cache_dir) repo = Repo(path) yield repo repo.close()
def test_branch_config(tmp_dir, scm): tmp_dir.scm_gen("foo", "foo", commit="init") # sanity check with pytest.raises(NotDvcRepoError): Repo().close() scm.checkout("branch", create_new=True) dvc = Repo.init() with dvc.config.edit() as conf: conf["remote"]["branch"] = {"url": "/some/path"} dvc.close() scm.add([os.path.join(".dvc", "config")]) scm.commit("init dvc") scm.checkout("master") with pytest.raises(NotDvcRepoError): Repo(rev="master").close() dvc = Repo(rev="branch") try: assert dvc.config["remote"]["branch"]["url"] == "/some/path" finally: dvc.close()
def _repro_dvc( cls, info: "ExecutorInfo", log_errors: bool = True, **kwargs, ): from dvc.repo import Repo from dvc.stage.monitor import CheckpointKilledError dvc = Repo(os.path.join(info.root_dir, info.dvc_dir)) if cls.QUIET: dvc.scm_context.quiet = cls.QUIET old_cwd = os.getcwd() if info.wdir: os.chdir(os.path.join(dvc.scm.root_dir, info.wdir)) else: os.chdir(dvc.root_dir) try: logger.debug("Running repro in '%s'", os.getcwd()) yield dvc except CheckpointKilledError: raise except DvcException: if log_errors: logger.exception("") raise except Exception: if log_errors: logger.exception("unexpected error") raise finally: dvc.close() os.chdir(old_cwd)
def _get_remote_config(url): try: repo = Repo(url) except NotDvcRepoError: return {} try: name = repo.config["core"].get("remote") if not name: # Fill the empty upstream entry with a new remote pointing to the # original repo's cache location. name = "auto-generated-upstream" return { "core": { "remote": name }, "remote": { name: { "url": repo.cache.local.cache_dir } }, } # Use original remote to make sure that we are using correct url, # credential paths, etc if they are relative to the config location. return {"remote": {name: repo.config["remote"][name]}} finally: repo.close()
def external_repo(url, rev=None, for_write=False, cache_dir=None, cache_types=None, **kwargs): from dvc.config import NoRemoteError from dvc.scm.git import Git logger.debug("Creating external repo %s@%s", url, rev) path = _cached_clone(url, rev, for_write=for_write) # Local HEAD points to the tip of whatever branch we first cloned from # (which may not be the default branch), use origin/HEAD here to get # the tip of the default branch rev = rev or "refs/remotes/origin/HEAD" cache_config = { "cache": { "dir": cache_dir or _get_cache_dir(url), "type": cache_types } } config = _get_remote_config(url) if os.path.isdir(url) else {} config.update(cache_config) root_dir = path if for_write else os.path.realpath(path) repo_kwargs = dict( root_dir=root_dir, url=url, scm=None if for_write else Git(root_dir), rev=None if for_write else rev, config=config, repo_factory=erepo_factory(url, cache_config), **kwargs, ) if "subrepos" not in repo_kwargs: repo_kwargs["subrepos"] = True if "uninitialized" not in repo_kwargs: repo_kwargs["uninitialized"] = True repo = Repo(**repo_kwargs) try: yield repo except NoRemoteError as exc: raise NoRemoteInExternalRepoError(url) from exc except OutputNotFoundError as exc: if exc.repo is repo: raise NoOutputInExternalRepoError(exc.output, repo.root_dir, url) from exc raise except FileMissingError as exc: raise PathMissingError(exc.path, url) from exc finally: repo.close() if for_write: _remove(path)
def _external_repo(url=None, rev=None, cache_dir=None): from dvc.config import Config from dvc.cache import CacheConfig from dvc.repo import Repo key = (url, rev, cache_dir) if key in REPO_CACHE: return REPO_CACHE[key] new_path = tempfile.mkdtemp("dvc-erepo") # Copy and adjust existing clone if (url, None, None) in REPO_CACHE: old_path = REPO_CACHE[url, None, None] # This one unlike shutil.copytree() works with an existing dir copy_tree(old_path, new_path) else: # Create a new clone _clone_repo(url, new_path) # Save clean clone dir so that we will have access to a default branch clean_clone_path = tempfile.mkdtemp("dvc-erepo") copy_tree(new_path, clean_clone_path) REPO_CACHE[url, None, None] = clean_clone_path # Adjust new clone/copy to fit rev and cache_dir # Checkout needs to be done first because current branch might not be # DVC repository if rev is not None: _git_checkout(new_path, rev) repo = Repo(new_path) try: # check if the URL is local and no default remote is present # add default remote pointing to the original repo's cache location if os.path.isdir(url): rconfig = RemoteConfig(repo.config) if not _default_remote_set(rconfig): original_repo = Repo(url) try: rconfig.add( "auto-generated-upstream", original_repo.cache.local.cache_dir, default=True, level=Config.LEVEL_LOCAL, ) finally: original_repo.close() if cache_dir is not None: cache_config = CacheConfig(repo.config) cache_config.set_dir(cache_dir, level=Config.LEVEL_LOCAL) finally: # Need to close/reopen repo to force config reread repo.close() REPO_CACHE[key] = new_path return new_path
def run(self): from dvc.repo import Repo try: repo = Repo() repo.close() except NotDvcRepoError: return 0 return self._run()
def external_repo(url=None, rev=None, rev_lock=None, cache_dir=None): from dvc.repo import Repo path = _external_repo(url=url, rev=rev_lock or rev, cache_dir=cache_dir) repo = Repo(path) try: yield repo except NoRemoteError as exc: raise RemoteNotSpecifiedInExternalRepoError(url, cause=exc) repo.close()
def _repro_dvc( cls, dvc_dir: Optional[str], rel_cwd: Optional[str], log_errors: bool, pidfile: Optional[str] = None, git_url: Optional[str] = None, **kwargs, ): from dvc.repo import Repo from dvc.utils.serialize import modify_yaml dvc = Repo(dvc_dir) if cls.QUIET: dvc.scm.quiet = cls.QUIET if dvc_dir is not None: old_cwd: Optional[str] = os.getcwd() if rel_cwd: os.chdir(os.path.join(dvc.root_dir, rel_cwd)) else: os.chdir(dvc.root_dir) else: old_cwd = None if pidfile is not None: info = ExecutorInfo( os.getpid(), git_url, dvc.scm.get_rev(), cls.DEFAULT_LOCATION, ) with modify_yaml(pidfile) as d: d.update(info.to_dict()) logger.debug("Running repro in '%s'", os.getcwd()) try: yield dvc except CheckpointKilledError: raise except DvcException: if log_errors: logger.exception("") raise except Exception: if log_errors: logger.exception("unexpected error") raise finally: if pidfile is not None: remove(pidfile) dvc.close() if old_cwd: os.chdir(old_cwd)
def _fix_upstream(self): if not os.path.isdir(self.url): return remote_name = self.config["core"].get("remote") src_repo = Repo(self.url) try: if remote_name: self._fix_local_remote(src_repo, remote_name) else: self._add_upstream(src_repo) finally: src_repo.close()
def external_repo(url=None, rev=None, rev_lock=None, cache_dir=None): from dvc.repo import Repo path = _external_repo(url=url, rev=rev_lock or rev, cache_dir=cache_dir) repo = Repo(path) try: yield repo except NoRemoteError: raise NoRemoteInExternalRepoError(url) except OutputNotFoundError as exc: if exc.repo is repo: raise NoOutputInExternalRepoError(exc.output, repo.root_dir, url) raise repo.close()
def _set_upstream(self): # check if the URL is local and no default remote is present # add default remote pointing to the original repo's cache location if os.path.isdir(self.url): if not self.config["core"].get("remote"): src_repo = Repo(self.url) try: cache_dir = src_repo.cache.local.cache_dir finally: src_repo.close() self.config["remote"]["auto-generated-upstream"] = { "url": cache_dir } self.config["core"]["remote"] = "auto-generated-upstream"
def _set_upstream(self): # check if the URL is local and no default remote is present # add default remote pointing to the original repo's cache location if os.path.isdir(self.url): rconfig = RemoteConfig(self.config) if not rconfig.has_default(): src_repo = Repo(self.url) try: rconfig.add( "auto-generated-upstream", src_repo.cache.local.cache_dir, default=True, level=Config.LEVEL_LOCAL, ) finally: src_repo.close()
def _run(self): from dvc.dvcfile import Dvcfile from dvc.repo import Repo dvc = Repo() try: ancestor = Dvcfile(dvc, self.args.ancestor, verify=False) our = Dvcfile(dvc, self.args.our, verify=False) their = Dvcfile(dvc, self.args.their, verify=False) our.merge(ancestor, their) return 0 finally: dvc.close()
def _repro_dvc( cls, info: "ExecutorInfo", log_errors: bool = True, infofile: Optional[str] = None, **kwargs, ): from dvc.repo import Repo from dvc.stage.monitor import CheckpointKilledError from dvc.utils.fs import makedirs from dvc.utils.serialize import modify_json dvc = Repo(os.path.join(info.root_dir, info.dvc_dir)) if cls.QUIET: dvc.scm_context.quiet = cls.QUIET old_cwd = os.getcwd() if info.wdir: os.chdir(os.path.join(dvc.scm.root_dir, info.wdir)) else: os.chdir(dvc.root_dir) if infofile is not None: makedirs(os.path.dirname(infofile), exist_ok=True) with modify_json(infofile) as d: d.update(info.asdict()) try: logger.debug("Running repro in '%s'", os.getcwd()) yield dvc except CheckpointKilledError: raise except DvcException: if log_errors: logger.exception("") raise except Exception: if log_errors: logger.exception("unexpected error") raise finally: if infofile is not None: with modify_json(infofile) as d: d.update(info.asdict()) dvc.close() os.chdir(old_cwd)
def _is_git_file(repo_dir, path): from dvc.repo import Repo if os.path.isabs(path): return False try: repo = Repo(repo_dir) except NotDvcRepoError: return True try: output = repo.find_out_by_relpath(path) return not output.use_cache except OutputNotFoundError: return True finally: repo.close()
def _fix_upstream(self, repo): if not os.path.isdir(self.url): return try: rel_path = os.path.relpath(repo.root_dir, self.root_dir) src_repo = Repo(PathInfo(self.url) / rel_path) except NotDvcRepoError: return try: remote_name = repo.config["core"].get("remote") if remote_name: self._fix_local_remote(repo, src_repo, remote_name) else: self._add_upstream(repo, src_repo) finally: src_repo.close()
def _external_repo(url=None, rev=None, cache_dir=None): from dvc.config import Config from dvc.cache import CacheConfig from dvc.repo import Repo key = (url, rev, cache_dir) if key in REPO_CACHE: return REPO_CACHE[key] new_path = tempfile.mkdtemp("dvc-erepo") # Copy and adjust existing clone if (url, None, None) in REPO_CACHE: old_path = REPO_CACHE[url, None, None] # This one unlike shutil.copytree() works with an existing dir copy_tree(old_path, new_path) else: # Create a new clone _clone_repo(url, new_path) # Save clean clone dir so that we will have access to a default branch clean_clone_path = tempfile.mkdtemp("dvc-erepo") copy_tree(new_path, clean_clone_path) REPO_CACHE[url, None, None] = clean_clone_path # Adjust new clone/copy to fit rev and cache_dir # Checkout needs to be done first because current branch might not be # DVC repository if rev is not None: _git_checkout(new_path, rev) repo = Repo(new_path) try: if cache_dir is not None: cache_config = CacheConfig(repo.config) cache_config.set_dir(cache_dir, level=Config.LEVEL_LOCAL) finally: # Need to close/reopen repo to force config reread repo.close() REPO_CACHE[key] = new_path return new_path
def _fix_upstream(self): if not os.path.isdir(self.url): return try: src_repo = Repo(self.url) except NotDvcRepoError: # If ExternalRepo does not throw NotDvcRepoError and Repo does, # the self.url might be a bare git repo. # NOTE: This will fail to resolve remote with relative path, # same as if it was a remote DVC repo. return try: remote_name = self.config["core"].get("remote") if remote_name: self._fix_local_remote(src_repo, remote_name) else: self._add_upstream(src_repo) finally: src_repo.close()
def _external_repo(url=None, rev=None, cache_dir=None): from dvc.config import Config from dvc.cache import CacheConfig from dvc.repo import Repo key = (url, rev, cache_dir) if key in REPO_CACHE: return REPO_CACHE[key] new_path = cached_clone(url, rev=rev) repo = Repo(new_path) try: # check if the URL is local and no default remote is present # add default remote pointing to the original repo's cache location if os.path.isdir(url): rconfig = RemoteConfig(repo.config) if not _default_remote_set(rconfig): original_repo = Repo(url) try: rconfig.add( "auto-generated-upstream", original_repo.cache.local.cache_dir, default=True, level=Config.LEVEL_LOCAL, ) finally: original_repo.close() if cache_dir is not None: cache_config = CacheConfig(repo.config) cache_config.set_dir(cache_dir, level=Config.LEVEL_LOCAL) finally: # Need to close/reopen repo to force config reread repo.close() REPO_CACHE[key] = new_path return new_path