def test_pattern_trie_fs(tmp_dir, dvc): tmp_dir.gen({ "top": { "first": { DvcIgnore.DVCIGNORE_FILE: "a\nb\nc", "middle": { "second": { DvcIgnore.DVCIGNORE_FILE: "d\ne\nf", "bottom": {}, } }, } }, "other": { DvcIgnore.DVCIGNORE_FILE: "1\n2\n3" }, }) dvc._reset() dvcignore = dvc.dvcignore ignore_pattern_top = dvcignore._get_trie_pattern(os.fspath(tmp_dir / "top")) ignore_pattern_other = dvcignore._get_trie_pattern( os.fspath(tmp_dir / "other")) ignore_pattern_first = dvcignore._get_trie_pattern( os.fspath(tmp_dir / "top" / "first")) ignore_pattern_middle = dvcignore._get_trie_pattern( os.fspath(tmp_dir / "top" / "first" / "middle")) ignore_pattern_second = dvcignore._get_trie_pattern( os.fspath(tmp_dir / "top" / "first" / "middle" / "second")) ignore_pattern_bottom = dvcignore._get_trie_pattern( os.fspath(tmp_dir / "top" / "first" / "middle" / "second" / "bottom")) base_pattern = ( _to_pattern_info_list([".hg/", ".git/", ".git", ".dvc/"]), os.fspath(tmp_dir), ) first_pattern = merge_patterns( *base_pattern, _to_pattern_info_list(["a", "b", "c"]), os.fspath(tmp_dir / "top" / "first"), ) second_pattern = merge_patterns( *first_pattern, _to_pattern_info_list(["d", "e", "f"]), os.fspath(tmp_dir / "top" / "first" / "middle" / "second"), ) other_pattern = merge_patterns( *base_pattern, _to_pattern_info_list(["1", "2", "3"]), os.fspath(tmp_dir / "other"), ) assert DvcIgnorePatterns(*base_pattern) == ignore_pattern_top assert DvcIgnorePatterns(*other_pattern) == ignore_pattern_other assert (DvcIgnorePatterns(*first_pattern) == ignore_pattern_first == ignore_pattern_middle) assert (DvcIgnorePatterns(*second_pattern) == ignore_pattern_second == ignore_pattern_bottom)
def test_pattern_trie_tree(tmp_dir, dvc): tmp_dir.gen({ "top": { "first": { DvcIgnore.DVCIGNORE_FILE: "a\nb\nc", "middle": { "second": { DvcIgnore.DVCIGNORE_FILE: "d\ne\nf", "bottom": {}, } }, }, }, "other": { DvcIgnore.DVCIGNORE_FILE: "1\n2\n3" }, }) dvc.tree.__dict__.pop("dvcignore", None) dvcignore = dvc.tree.dvcignore ignore_pattern_top = dvcignore._get_trie_pattern(os.fspath(tmp_dir / "top")) ignore_pattern_other = dvcignore._get_trie_pattern( os.fspath(tmp_dir / "other")) ignore_pattern_first = dvcignore._get_trie_pattern( os.fspath(tmp_dir / "top" / "first")) ignore_pattern_middle = dvcignore._get_trie_pattern( os.fspath(tmp_dir / "top" / "first" / "middle")) ignore_pattern_second = dvcignore._get_trie_pattern( os.fspath(tmp_dir / "top" / "first" / "middle" / "second")) ignore_pattern_bottom = dvcignore._get_trie_pattern( os.fspath(tmp_dir / "top" / "first" / "middle" / "second" / "bottom")) base_pattern = [".hg/", ".git/", ".dvc/"], os.fspath(tmp_dir) first_pattern = merge_patterns(*base_pattern, ["a", "b", "c"], os.fspath(tmp_dir / "top" / "first")) second_pattern = merge_patterns( *first_pattern, ["d", "e", "f"], os.fspath(tmp_dir / "top" / "first" / "middle" / "second")) other_pattern = merge_patterns(*base_pattern, ["1", "2", "3"], os.fspath(tmp_dir / "other")) assert DvcIgnorePatterns(*base_pattern) == ignore_pattern_top assert DvcIgnorePatterns(*other_pattern) == ignore_pattern_other assert (DvcIgnorePatterns(*first_pattern) == ignore_pattern_first == ignore_pattern_middle) assert (DvcIgnorePatterns(*second_pattern) == ignore_pattern_second == ignore_pattern_bottom)
def _update_sub_repo(self, path): from dvc.repo import Repo if path == self.root_dir: return dvc_dir = os.path.join(path, Repo.DVC_DIR) if not os.path.exists(dvc_dir): return root, dname = os.path.split(path) self._ignored_subrepos[root] = self._ignored_subrepos.get( root, set() ) | {dname} pattern_info = PatternInfo(f"/{dname}/", f"in sub_repo:{dname}") new_pattern = DvcIgnorePatterns([pattern_info], root) old_pattern = self.ignores_trie_tree.longest_prefix(root).value if old_pattern: self.ignores_trie_tree[root] = DvcIgnorePatterns( *merge_patterns( old_pattern.pattern_list, old_pattern.dirname, new_pattern.pattern_list, new_pattern.dirname, ) ) else: self.ignores_trie_tree[root] = new_pattern
def _update(self, dirname): self._update_sub_repo(dirname) old_pattern = self.ignores_trie_tree.longest_prefix(dirname).value matches = old_pattern.matches(dirname, DvcIgnore.DVCIGNORE_FILE, False) ignore_file_path = os.path.join(dirname, DvcIgnore.DVCIGNORE_FILE) if not matches and self.tree.exists( ignore_file_path, use_dvcignore=False ): new_pattern = DvcIgnorePatterns.from_files( ignore_file_path, self.tree ) if old_pattern: self.ignores_trie_tree[dirname] = DvcIgnorePatterns( *merge_patterns( old_pattern.pattern_list, old_pattern.dirname, new_pattern.pattern_list, new_pattern.dirname, ) ) else: self.ignores_trie_tree[dirname] = new_pattern elif old_pattern: self.ignores_trie_tree[dirname] = old_pattern
def _update(self, dirname): old_pattern = self.ignores_trie_tree.longest_prefix(dirname).value matches = old_pattern.matches(dirname, DvcIgnore.DVCIGNORE_FILE, False) ignore_file_path = os.path.join(dirname, DvcIgnore.DVCIGNORE_FILE) if not matches and self.tree.exists(ignore_file_path, use_dvcignore=False): new_pattern = DvcIgnorePatterns.from_files(ignore_file_path, self.tree) if old_pattern: self.ignores_trie_tree[dirname] = DvcIgnorePatterns( *merge_patterns( old_pattern.pattern_list, old_pattern.dirname, new_pattern.pattern_list, new_pattern.dirname, )) else: self.ignores_trie_tree[dirname] = new_pattern elif old_pattern: self.ignores_trie_tree[dirname] = old_pattern # NOTE: using `walk` + `break` because tree doesn't have `listdir()` for root, dirs, _ in self.tree.walk(dirname, use_dvcignore=False): self._update_sub_repo(root, dirs) break
def test_ignore_collecting_dvcignores(tmp_dir, dvc, dname): tmp_dir.gen({"dir": {"subdir": {}}}) top_ignore_file = (tmp_dir / dname).with_name(DvcIgnore.DVCIGNORE_FILE) top_ignore_file.write_text(os.path.basename(dname)) dvc._reset() ignore_file = tmp_dir / dname / DvcIgnore.DVCIGNORE_FILE ignore_file.write_text("foo") dvcignore = dvc.dvcignore top_ignore_path = os.path.dirname(os.fspath(top_ignore_file)) sub_dir_path = os.path.dirname(os.fspath(ignore_file)) assert ( DvcIgnorePatterns( *merge_patterns( _to_pattern_info_list([".hg/", ".git/", ".git", ".dvc/"]), os.fspath(tmp_dir), _to_pattern_info_list([os.path.basename(dname)]), top_ignore_path, ) ) == dvcignore._get_trie_pattern(top_ignore_path) == dvcignore._get_trie_pattern(sub_dir_path) )
def _update_sub_repo(self, path, ignore_trie: Trie): from dvc.repo import Repo if path == self.root_dir: return dvc_dir = self.fs.path.join(path, Repo.DVC_DIR) if not self.fs.exists(dvc_dir): return root, dname = self.fs.path.split(path) key = self._get_key(root) pattern_info = PatternInfo(f"/{dname}/", f"in sub_repo:{dname}") new_pattern = DvcIgnorePatterns([pattern_info], root, self.fs.sep) old_pattern = ignore_trie.longest_prefix(key).value if old_pattern: plist, prefix = merge_patterns( self.fs.path.flavour, old_pattern.pattern_list, old_pattern.dirname, new_pattern.pattern_list, new_pattern.dirname, ) ignore_trie[key] = DvcIgnorePatterns(plist, prefix, self.fs.sep) else: ignore_trie[key] = new_pattern
def __setitem__(self, root, ignore_pattern): base_pattern = self[root] common_dirname, merged_pattern = merge_patterns( base_pattern.dirname, base_pattern.pattern_list, ignore_pattern.dirname, ignore_pattern.pattern_list, ) self.trie[root] = DvcIgnorePatterns(merged_pattern, common_dirname)
def _update_sub_repo(self, root, dirs): for d in dirs: if self._is_dvc_repo(root, d): old_pattern = self.ignores_trie_tree.longest_prefix(root).value if old_pattern: self.ignores_trie_tree[root] = DvcIgnorePatterns( *merge_patterns( old_pattern.pattern_list, old_pattern.dirname, ["/{}/".format(d)], root, )) else: self.ignores_trie_tree[root] = DvcIgnorePatterns( ["/{}/".format(d)], root)
def _update_sub_repo(self, root, dirs): for d in dirs: if self._is_dvc_repo(root, d): self._ignored_subrepos[root] = self._ignored_subrepos.get( root, set()) | {d} new_pattern = DvcIgnorePatterns([f"/{d}/"], root) old_pattern = self.ignores_trie_tree.longest_prefix(root).value if old_pattern: self.ignores_trie_tree[root] = DvcIgnorePatterns( *merge_patterns( old_pattern.pattern_list, old_pattern.dirname, new_pattern.pattern_list, new_pattern.dirname, )) else: self.ignores_trie_tree[root] = new_pattern
def _update(self, dirname): ignore_file_path = os.path.join(dirname, DvcIgnore.DVCIGNORE_FILE) if self.tree.exists(ignore_file_path, use_dvcignore=False): new_pattern = DvcIgnorePatterns.from_files( ignore_file_path, self.tree ) old_pattern = self._get_trie_pattern(dirname) if old_pattern: self.ignores_trie_tree[dirname] = DvcIgnorePatterns( *merge_patterns( old_pattern.pattern_list, old_pattern.dirname, new_pattern.pattern_list, new_pattern.dirname, ) ) else: self.ignores_trie_tree[dirname] = new_pattern
def _update_trie(self, dirname: str, trie: PathStringTrie) -> None: old_pattern = trie.longest_prefix(dirname).value matches = old_pattern.matches(dirname, DvcIgnore.DVCIGNORE_FILE, False) path = os.path.join(dirname, DvcIgnore.DVCIGNORE_FILE) if not matches and self.fs.exists(path): name = os.path.relpath(path, self.root_dir) new_pattern = DvcIgnorePatterns.from_file(path, self.fs, name) if old_pattern: trie[dirname] = DvcIgnorePatterns(*merge_patterns( old_pattern.pattern_list, old_pattern.dirname, new_pattern.pattern_list, new_pattern.dirname, )) else: trie[dirname] = new_pattern elif old_pattern: trie[dirname] = old_pattern
def _update_trie(self, dirname: str, trie: Trie) -> None: key = self._get_key(dirname) old_pattern = trie.longest_prefix(key).value matches = old_pattern.matches(dirname, DvcIgnore.DVCIGNORE_FILE, False) path = self.fs.path.join(dirname, DvcIgnore.DVCIGNORE_FILE) if not matches and self.fs.exists(path): name = self.fs.path.relpath(path, self.root_dir) new_pattern = DvcIgnorePatterns.from_file(path, self.fs, name) if old_pattern: plist, prefix = merge_patterns( self.fs.path.flavour, old_pattern.pattern_list, old_pattern.dirname, new_pattern.pattern_list, new_pattern.dirname, ) trie[key] = DvcIgnorePatterns(plist, prefix, self.fs.sep) else: trie[key] = new_pattern elif old_pattern: trie[key] = old_pattern
def test_ignore_collecting_dvcignores(tmp_dir, dvc, dname): tmp_dir.gen({"dir": {"subdir": {}}}) top_ignore_file = (tmp_dir / dname).with_name(DvcIgnore.DVCIGNORE_FILE) top_ignore_file.write_text(os.path.basename(dname)) dvc.tree.__dict__.pop("dvcignore", None) ignore_file = tmp_dir / dname / DvcIgnore.DVCIGNORE_FILE ignore_file.write_text("foo") dvcignore = dvc.tree.dvcignore top_ignore_path = os.path.dirname(os.fspath(top_ignore_file)) sub_dir_path = os.path.dirname(os.fspath(ignore_file)) assert (DvcIgnorePatterns(*merge_patterns( [".hg/", ".git/", ".dvc/"], os.fspath(tmp_dir), [os.path.basename(dname)], top_ignore_path, )) == dvcignore._get_trie_pattern(top_ignore_path) == dvcignore._get_trie_pattern(sub_dir_path))