def test_path_isin_accepts_pathinfo(): child = os.path.join("path", "to", "folder") parent = PathInfo(child) / ".." assert path_isin(child, parent) # pylint: disable=arguments-out-of-order assert not path_isin(parent, child)
def test_path_isin_positive(): child = os.path.join("path", "to", "folder") assert path_isin(child, os.path.join("path", "to", "")) assert path_isin(child, os.path.join("path", "to")) assert path_isin(child, os.path.join("path", "")) assert path_isin(child, os.path.join("path"))
def test_path_isin_on_same_path(): path = os.path.join("path", "to", "folder") path_with_sep = os.path.join(path, "") assert not path_isin(path, path) assert not path_isin(path, path_with_sep) assert not path_isin(path_with_sep, path) assert not path_isin(path_with_sep, path_with_sep)
def collect(self, target, with_deps=False, recursive=False, graph=None): import networkx as nx from dvc.stage import Stage G = graph or self.graph if not target: return get_stages(G) target = os.path.abspath(target) if recursive and os.path.isdir(target): attrs = nx.get_node_attributes(G, "stage") nodes = [node for node in nx.dfs_postorder_nodes(G)] ret = [] for node in nodes: stage = attrs[node] if path_isin(stage.path, target): ret.append(stage) return ret stage = Stage.load(self, target) if not with_deps: return [stage] node = relpath(stage.path, self.root_dir) pipeline = get_pipeline(get_pipelines(G), node) return [ pipeline.node[n]["stage"] for n in nx.dfs_postorder_nodes(pipeline, node) ]
def __init__(self, stage, path, *args, **kwargs): if stage and path_isin(path, stage.repo.root_dir): path = relpath(path, stage.wdir) super().__init__(stage, path, *args, **kwargs) if self.is_in_repo and self.repo and is_working_tree(self.repo.tree): self.tree = self.repo.tree
def _ignore(self): flist = [self.config.files["local"], self.tmp_dir] if path_isin(self.cache.local.cache_dir, self.root_dir): flist += [self.cache.local.cache_dir] self.scm.ignore_list(flist)
def imp_url(self, url, out=None, fname=None, erepo=None, locked=True): from dvc.dvcfile import Dvcfile from dvc.stage import Stage out = resolve_output(url, out) path, wdir, out = resolve_paths(self, out) # NOTE: when user is importing something from within his own repository if os.path.exists(url) and path_isin(os.path.abspath(url), self.root_dir): url = relpath(url, wdir) stage = Stage.create( self, fname or path, wdir=wdir, deps=[url], outs=[out], erepo=erepo, ) if stage is None: return None dvcfile = Dvcfile(self, stage.path) dvcfile.overwrite_with_prompt(force=True) self.check_modified_graph([stage]) stage.run() stage.locked = locked dvcfile.dump(stage) return stage
def imp_url( self, url, out=None, fname=None, erepo=None, frozen=True, no_exec=False, desc=None, jobs=None, ): from dvc.dvcfile import Dvcfile from dvc.stage import Stage, create_stage, restore_meta out = resolve_output(url, out) path, wdir, out = resolve_paths(self, out) # NOTE: when user is importing something from within their own repository if ( erepo is None and os.path.exists(url) and path_isin(os.path.abspath(url), self.root_dir) ): url = relpath(url, wdir) stage = create_stage( Stage, self, fname or path, wdir=wdir, deps=[url], outs=[out], erepo=erepo, ) restore_meta(stage) if stage.can_be_skipped: return None if desc: stage.outs[0].desc = desc dvcfile = Dvcfile(self, stage.path) dvcfile.remove() try: self.check_modified_graph([stage]) except OutputDuplicationError as exc: raise OutputDuplicationError(exc.output, set(exc.stages) - {stage}) if no_exec: stage.ignore_outs() else: stage.run(jobs=jobs) stage.frozen = frozen dvcfile.dump(stage) return stage
def _ignore(self): flist = [self.config.files["local"], self.tmp_dir] if path_isin(self.odb.local.cache_dir, self.root_dir): flist += [self.odb.local.cache_dir] for file in flist: self.scm_context.ignore(file)
def __init__(self, stage, path, *args, **kwargs): if stage and path_isin(path, stage.repo.root_dir): path = relpath(path, stage.wdir) super().__init__(stage, path, *args, **kwargs) if (self.is_in_repo and self.repo and isinstance(self.repo.fs, LocalFileSystem)): self.fs = self.repo.fs
def __str__(self): if not self.is_in_repo: return str(self.def_path) cur_dir = os.getcwd() if path_isin(cur_dir, self.repo.root_dir): return relpath(self.path_info, cur_dir) return relpath(self.path_info, self.repo.root_dir)
def is_in_repo(self): if urlparse(self.def_path).scheme == "remote": return False if os.path.isabs(self.def_path): return False return self.repo and path_isin(os.path.realpath(self.path_info), self.repo.root_dir)
def __str__(self): if (not self.repo or urlparse(self.def_path).scheme == "remote" or os.path.isabs(self.def_path)): return str(self.def_path) cur_dir = os.getcwd() if path_isin(cur_dir, self.repo.root_dir): return relpath(self.path_info, cur_dir) return relpath(self.path_info, self.repo.root_dir)
def test_windows_should_add_when_cache_on_different_drive( tmp_dir, dvc, temporary_windows_drive): dvc.config["cache"]["dir"] = temporary_windows_drive dvc.cache = Cache(dvc) (stage, ) = tmp_dir.dvc_gen({"file": "file"}) cache_path = stage.outs[0].cache_path assert path_isin(cache_path, temporary_windows_drive) assert os.path.isfile(cache_path) filecmp.cmp("file", cache_path)
def _ignore(self): from dvc.updater import Updater updater = Updater(self.dvc_dir) flist = ([self.config.config_local_file, updater.updater_file] + self.state.files + self.lock.files + updater.lock.files) if path_isin(self.cache.local.cache_dir, self.root_dir): flist += [self.cache.local.cache_dir] self.scm.ignore_list(flist)
def _ignore(self): flist = [ self.config.files["local"], self.tmp_dir, ] if self.experiments: flist.append(self.experiments.exp_dir) if path_isin(self.cache.local.cache_dir, self.root_dir): flist += [self.cache.local.cache_dir] self.scm.ignore_list(flist)
def _check_stage_path(repo, path): assert repo is not None real_path = os.path.realpath(path) if not os.path.exists(real_path): raise StagePathNotFoundError(path) if not os.path.isdir(real_path): raise StagePathNotDirectoryError(path) proj_dir = os.path.realpath(repo.root_dir) if real_path != proj_dir and not path_isin(real_path, proj_dir): raise StagePathOutsideError(path)
def _get_gitignore(self, path): ignore_file_dir = os.path.dirname(path) assert os.path.isabs(path) assert os.path.isabs(ignore_file_dir) entry = relpath(path, ignore_file_dir).replace(os.sep, "/") # NOTE: using '/' prefix to make path unambiguous if len(entry) > 0 and entry[0] != "/": entry = "/" + entry gitignore = os.path.join(ignore_file_dir, self.GITIGNORE) if not path_isin(os.path.realpath(gitignore), self.root_dir): raise FileNotInRepoError(path) return entry, gitignore
def check_stage_path(repo, path, is_wdir=False): assert repo is not None error_msg = "{wdir_or_path} '{path}' {{}}".format( wdir_or_path="stage working dir" if is_wdir else "file path", path=path, ) real_path = os.path.realpath(path) if not os.path.exists(real_path): raise StagePathNotFoundError(error_msg.format("does not exist")) if not os.path.isdir(real_path): raise StagePathNotDirectoryError(error_msg.format("is not directory")) proj_dir = os.path.realpath(repo.root_dir) if real_path != proj_dir and not path_isin(real_path, proj_dir): raise StagePathOutsideError(error_msg.format("is outside of DVC repo"))
def imp_url(self, url, out=None, fname=None, erepo=None, frozen=True): from dvc.dvcfile import Dvcfile from dvc.stage import Stage, create_stage out = resolve_output(url, out) path, wdir, out = resolve_paths(self, out) # NOTE: when user is importing something from within their own repository if ( erepo is None and os.path.exists(url) and path_isin(os.path.abspath(url), self.root_dir) ): url = relpath(url, wdir) stage = create_stage( Stage, self, fname or path, wdir=wdir, deps=[url], outs=[out], erepo=erepo, ) if stage is None: return None dvcfile = Dvcfile(self, stage.path) dvcfile.remove_with_prompt(force=True) try: self.check_modified_graph([stage]) except OutputDuplicationError as exc: raise OutputDuplicationError(exc.output, set(exc.stages) - {stage}) stage.run() stage.frozen = frozen dvcfile.dump(stage) return stage
def test_external_repo(erepo): url = erepo.root_dir # We will share cache dir, to fetch version file cache_dir = erepo.dvc.cache.local.cache_dir with patch.object(Git, "clone", wraps=Git.clone) as mock: with external_repo(url, cache_dir=cache_dir) as repo: with repo.open(os.path.join(repo.root_dir, "version")) as fd: assert fd.read() == "master" with external_repo(url, rev="branch", cache_dir=cache_dir) as repo: with repo.open(os.path.join(repo.root_dir, "version")) as fd: assert fd.read() == "branch" # Check cache_dir is unset with external_repo(url) as repo: assert path_isin(repo.cache.local.cache_dir, repo.root_dir) assert mock.call_count == 1
def collect(self, target, with_deps=False, recursive=False, graph=None): import networkx as nx from dvc.stage import Stage if not target: return list(graph) if graph else self.stages target = os.path.abspath(target) if recursive and os.path.isdir(target): stages = nx.dfs_postorder_nodes(graph or self.graph) return [stage for stage in stages if path_isin(stage.path, target)] stage = Stage.load(self, target) # Optimization: do not collect the graph for a specific target if not with_deps: return [stage] pipeline = get_pipeline(get_pipelines(graph or self.graph), stage) return list(nx.dfs_postorder_nodes(pipeline, stage))
def collect(self, target, with_deps=False, recursive=False, graph=None): import networkx as nx from dvc.stage import Stage G = graph or self.graph if not target: return list(G) target = os.path.abspath(target) if recursive and os.path.isdir(target): stages = nx.dfs_postorder_nodes(G) return [stage for stage in stages if path_isin(stage.path, target)] stage = Stage.load(self, target) if not with_deps: return [stage] pipeline = get_pipeline(get_pipelines(G), stage) return list(nx.dfs_postorder_nodes(pipeline, stage))
def test_external_repo(erepo_dir): with erepo_dir.chdir(): with erepo_dir.branch("branch", new=True): erepo_dir.dvc_gen("file", "branch", commit="create file on branch") erepo_dir.dvc_gen("file", "master", commit="create file on master") url = fspath(erepo_dir) # We will share cache dir, to fetch version file cache_dir = erepo_dir.dvc.cache.local.cache_dir with patch.object(Git, "clone", wraps=Git.clone) as mock: with external_repo(url, cache_dir=cache_dir) as repo: with repo.open(os.path.join(repo.root_dir, "file")) as fd: assert fd.read() == "master" with external_repo(url, rev="branch", cache_dir=cache_dir) as repo: with repo.open(os.path.join(repo.root_dir, "file")) as fd: assert fd.read() == "branch" # Check cache_dir is unset with external_repo(url) as repo: assert path_isin(repo.cache.local.cache_dir, repo.root_dir) assert mock.call_count == 1
def imp_url( self, url, out=None, fname=None, erepo=None, frozen=True, no_exec=False, remote=None, to_remote=False, desc=None, jobs=None, ): from dvc.dvcfile import Dvcfile from dvc.stage import Stage, create_stage, restore_meta out = resolve_output(url, out) path, wdir, out = resolve_paths(self, out, always_local=to_remote and not out) if to_remote and no_exec: raise InvalidArgumentError( "--no-exec can't be combined with --to-remote") if not to_remote and remote: raise InvalidArgumentError( "--remote can't be used without --to-remote") # NOTE: when user is importing something from within their own repository if (erepo is None and os.path.exists(url) and path_isin(os.path.abspath(url), self.root_dir)): url = relpath(url, wdir) stage = create_stage( Stage, self, fname or path, wdir=wdir, deps=[url], outs=[out], erepo=erepo, ) restore_meta(stage) if desc: stage.outs[0].desc = desc dvcfile = Dvcfile(self, stage.path) dvcfile.remove() try: new_index = self.index.add(stage) new_index.check_graph() except OutputDuplicationError as exc: raise OutputDuplicationError(exc.output, set(exc.stages) - {stage}) if no_exec: stage.ignore_outs() elif to_remote: remote_odb = self.cloud.get_remote_odb(remote, "import-url") stage.outs[0].transfer(url, odb=remote_odb, jobs=jobs) stage.save_deps() stage.md5 = stage.compute_md5() else: stage.run(jobs=jobs) stage.frozen = frozen dvcfile.dump(stage) return stage
def __init__(self, stage, path, *args, **kwargs): if stage and path_isin(path, stage.repo.root_dir): path = relpath(path, stage.wdir) super().__init__(stage, path, *args, **kwargs)
def _collect_inside(self, path, graph): import networkx as nx stages = nx.dfs_postorder_nodes(graph) return [stage for stage in stages if path_isin(stage.path, path)]
def test_path_isin_on_common_substring_path(): path1 = os.path.join("path", "to", "folder1") path2 = os.path.join("path", "to", "folder") assert not path_isin(path1, path2)
def test_path_isin_accepts_pathinfo(): child = os.path.join("path", "to", "folder") parent = PathInfo(child) / ".." assert path_isin(child, parent) assert not path_isin(parent, child)
def test_path_isin_with_absolute_path(): parent = os.path.abspath("path") child = os.path.join(parent, "to", "folder") assert path_isin(child, parent)