def test_matches_simple__negative(self): path_matcher1 = gitignore_parser("!a")[0] self.assertTrue(path_matcher1.matches_simple(CPath("a"))) self.assertTrue(path_matcher1.matches_simple(CPath("z/a"))) self.assertFalse(path_matcher1.matches_simple(CPath("b")))
def test_matches(self): path_matcher1 = gitignore_parser("!a")[0] self.assertFalse(path_matcher1.matches(CPath("a"))) self.assertFalse(path_matcher1.matches(CPath("z/a"))) self.assertFalse(path_matcher1.matches(CPath("b")))
def test_listdir(self): os_dir_list = os.listdir(join_base(self.this_test_dir_name)) fs_dir_list = self.real_fs.listdir(CPath(self.this_test_dir_name)) self.assertEqual(os_dir_list, fs_dir_list) # list the root dir self.assertGreater(len(self.real_fs.listdir(CPath(""))), 0)
def test_ignore__char_class(self): self.assertFalse( FsMatcherGitignore("a/[!0-8]z")._ignore(CPath("a/8z"))) self.assertTrue(FsMatcherGitignore("a/[!0-8]z")._ignore(CPath("a/9z"))) self.assertFalse( FsMatcherGitignore("a/[!0-8]z")._ignore(CPath("a/09z")))
def test_to_dict(self): path_1 = r"firstdir/seconddir\\\thirdfile" cpath_1 = CPath(path_1) self.assertEqual( { 'names': ('firstdir', 'seconddir', 'thirdfile'), 'type': 'FILE', 'path': 'firstdir/seconddir/thirdfile' }, cpath_1.to_dict())
def test_names(self): path_1 = "firstdir/seconddir/thirdfile" cpath_1 = CPath(path_1) self.assertEqual(('firstdir', 'seconddir', 'thirdfile'), cpath_1.names) path_2 = "firstdir/seconddir/thirdfile" cpath_2 = CPath(path_2) self.assertEqual(('firstdir', 'seconddir', 'thirdfile'), cpath_2.names) self.assertNotEqual(('', 'firstdir', 'seconddir', 'thirdfile'), cpath_2.names)
def test_exists__path_type_independence(self): cpath1 = CPath(f"{self.this_test_dir_name}/subdir/afile") cfile1 = CFile(f"{self.this_test_dir_name}/subdir/afile", 1, 1) cpath2 = CPath(f"{self.this_test_dir_name}/subdir") cdir2 = CDir(f"{self.this_test_dir_name}/subdir") self.assertTrue(self.real_fs.exists(cpath1)) self.assertTrue(self.real_fs.exists(cfile1)) self.assertTrue(self.real_fs.exists(cpath2)) self.assertTrue(self.real_fs.exists(cdir2))
def test_matches_simple__parent_dir_exclusion(self): path_matcher1 = gitignore_parser("logs")[0] "File will be excluded" self.assertTrue(path_matcher1.matches_simple(CPath("logs"))) "So does directory" self.assertTrue(path_matcher1.matches_simple(CPath("logs/"))) "And any file inside that dire" self.assertTrue( path_matcher1.matches_simple(CPath("logs/important.log"))) "And again any dir inside that dir" self.assertTrue( path_matcher1.matches_simple(CPath("logs/important.log/")))
def test_path(self): path_1 = "firstdir/seconddir/thirdfile" cpath_1 = CPath(path_1) self.assertEqual('firstdir/seconddir/thirdfile', cpath_1.path) path_2 = "/firstdir/seconddir/thirdfile" cpath_2 = CPath(path_2) self.assertEqual('/firstdir/seconddir/thirdfile', cpath_2.path) self.assertNotEqual('firstdir/seconddir/thirdfile', cpath_2.path) path_3 = "/firstdir/seconddir/thirdfile/" cpath_3 = CPath(path_3) self.assertEqual('/firstdir/seconddir/thirdfile/', cpath_3.path)
def test_matches__root_relative(self): """Tests whether the produced rule behaves well when it is root relative""" path_rule1 = gitignore_parser("a/*/z")[0] self.assertTrue(path_rule1.is_root_relative) "It matches a path that is root relative" self.assertTrue(path_rule1.matches(CPath("a/b/z"))) "But it doesn't match inner path" self.assertFalse(path_rule1.matches(CPath("1/a/b/z"))) "But if the rule is not root relative" path_rule2 = gitignore_parser("a*z")[0] self.assertTrue(path_rule2.matches(CPath("ayz")))
def test_matches_simple__parent_dir_exclusion___dir_only(self): path_matcher2 = gitignore_parser("logs/")[0] "Notice here that it will not match" "Dir only pattern will not match file" self.assertFalse(path_matcher2.matches_simple(CPath("logs"))) "But will match dirs as usual" self.assertTrue(path_matcher2.matches_simple(CPath("logs/"))) "Subdirs will match as before" self.assertTrue( path_matcher2.matches_simple(CPath("logs/important.log/"))) "And also anything inside that dir pattern" self.assertTrue( path_matcher2.matches_simple(CPath("logs/important.log")))
def test_ignore__double_asterisk(self): self.assertTrue(FsMatcherGitignore("m/**")._ignore(CPath("m/n/o9z"))) self.assertTrue( FsMatcherGitignore("a/**/z")._ignore(CPath("a/b/c/d/z"))) self.assertTrue(FsMatcherGitignore("a/**/z")._ignore(CPath("a/z"))) self.assertTrue( FsMatcherGitignore("a/**/z")._ignore(CPath("a/b/z/c/z"))) self.assertFalse( FsMatcherGitignore("a/**/z/q/z")._ignore(CPath("a/b/z/c/z")))
def test_ignore__negation_directory(self): """ Rules: logs/ !logs/important.log Will ignore (inspite of !...): logs/debug.log logs/important.log This is due to the performance reason git do not re-include when a parent directory is ignored. """ fs_ignore = FsMatcherGitignore("logs/\n!logs/important.log") self.assertTrue(fs_ignore._ignore(CPath("logs/debug.log"))) self.assertTrue(fs_ignore._ignore(CPath("logs/important.log")))
def test_matches__directories_only(self): """Tests whether directories only rule matchers properly""" path_rule1 = gitignore_parser("z/?u*ns/")[0] "This is a directories only rule" self.assertTrue(path_rule1.directories_only) "And it matches as it should be" self.assertTrue(path_rule1.matches(CPath("z/humans/"))) path_rule2 = gitignore_parser("z/?uman")[0] "This is NOT a directories only rule" self.assertFalse(path_rule2.directories_only) "But it matches as it should be" self.assertTrue(path_rule2.matches(CPath("z/human"))) "It matches both filesCpath (above) and directories (below)" self.assertTrue(path_rule2.matches(CPath("z/human/")))
def test_add(self): tree1 = CDirTree("a/b/c") etree = tree1.add(CDir('a/b/c/d/e')) self.assertEqual(etree.path, 'a/b/c/d/e/') ddir = tree1.get(CPath("a/b/c/d/")) self.assertEqual(('a', 'b', 'c', 'd'), ddir.names) ddir2 = tree1.get(CPath("a/b/c/d")) self.assertIsNotNone(ddir2) ddir4 = tree1.get(CDir("a/b/c/d")) self.assertIsNotNone(ddir4) ddir3 = tree1.get(CFile("a/b/c/d", 1, 1)) self.assertIsNone(ddir3)
def test_ignore__negation_re_ignore(self): """ Rules: *.log !important/*.log trace.* Matches: debug.log important/trace.log but not: important/debug.log """ fs_ignore = FsMatcherGitignore("*.log\n!important/*.log\ntrace.*") self.assertTrue(fs_ignore._ignore(CPath("debug.log"))) self.assertTrue(fs_ignore._ignore(CPath("important/trace.log"))) self.assertFalse(fs_ignore._ignore(CPath("important/debug.log")))
def setUp(self) -> None: self.this_test_dir_name = "real_fs_test_data" self.file_1_name = f"{self.this_test_dir_name}/test_file_for_hash" self.file_1_sha1_hash = "A053DC84FE753C3E9187B97923F7A57BB7F44299" self.file_1_cpath = CPath(self.file_1_name) self.real_fs = RealMetaFileSystemBackend().set_base_path( get_data_dir())
def test_get(self): tree1 = CDirTree("a/b/c") ftree = tree1.add(CDir('a/b/c/d/e/f')) self.assertEqual(('a', 'b', 'c', 'd', 'e', 'f'), ftree.names) ddir = tree1.get(CPath('a/b/c/d/')) self.assertEqual('a/b/c/d/', ddir.path) # path type aware/unaware self.assertIsNotNone( tree1.get(CPath('a/b/c/d')) ) self.assertIsNotNone( tree1.get(CFile('a/b/c/d', 1, 1), path_type_aware=False) ) self.assertIsNone( tree1.get(CFile('a/b/c/d', 1, 1)) )
def test_diff(self): root_tree1 = CDirTree() root_tree1.add(CFile("dir1/a.txt", 1, 2)) root_tree1.add(CDir("dir2/")) root_tree1.add(CDir("dir3/p/q/r")) root_tree1.add(CFile("dir3/p/x.txt", 1, 5)) root_tree2 = CDirTree() root_tree2.add(CFile("dir1/a.txt", 11, 55)) root_tree2.add(CDir("dir3/p/q/r")) root_tree2.add(CFile("dir3/p/x.txt", 1, 5)) diff = root_tree1.diff(root_tree2) self.assertIsNotNone(diff.modified.get(CPath("dir1/a.txt"))) self.assertTrue(len(diff.modified.get_children_cpaths()), 1) self.assertTrue(diff.new.is_empty) self.assertIsNotNone(diff.deleted.get(CPath("dir2/"))) self.assertTrue(len(diff.deleted.get_children_cpaths()), 1)
def test_path_to_names(self): path = "\\firstdir/seconddir/thirdfile" comps_info = CPath.to_path_comps_info(path) names = comps_info.names self.assertEqual( ('firstdir', 'seconddir', 'thirdfile'), names) # TODO: should this be changed to tuple from list? self.assertEqual(comps_info.drive, '/') path_2 = r"firstdir\seconddir/thirdfile" names_2 = CPath.to_path_comps_info(path_2).names self.assertEqual(('firstdir', 'seconddir', 'thirdfile'), names_2) path_3 = r"firstdir\\\seconddir\thirdfile/" names_3 = CPath.to_path_comps_info(path_3).names self.assertEqual(('firstdir', 'seconddir', 'thirdfile'), names_3) path_4 = "/firstdir/seconddir/thirdfile/" names_4 = CPath.to_path_comps_info(path_4).names self.assertEqual(('firstdir', 'seconddir', 'thirdfile'), names_4)
def test_ignore__negation(self): """ Rules: *.log !important.log Matches: debug.log trace.log but not important.log logs/important.log """ fs_ignore = FsMatcherGitignore("*.log\n!important.log") # will match self.assertTrue(fs_ignore._ignore(CPath("debug.log"))) self.assertTrue(fs_ignore._ignore(CPath("trace.log"))) # will not match self.assertFalse(fs_ignore._ignore(CPath("important.log"))) self.assertFalse(fs_ignore._ignore(CPath("logs/important.log")))
def __list(self, parent: CDirTree, do_hash=False): assert isinstance(parent, CDirTree) path_names: List[str] = self.__fs.listdir(parent) parent_names: Tuple[str, ...] = parent.names # transform path into cpath, but don't do hash because: the file might already be ignored by a fs matcher # config file. hash in listing block child_cpaths = [] for path_name in path_names: names = (*parent_names, path_name) child_cpath = CPath(names) if self.__fs.is_file(child_cpath): mtime = self.__fs.getmtime(child_cpath) size = self.__fs.getsize(child_cpath) child_cpath = CFile(names, mtime, size) else: child_cpath = CDir(names) child_cpaths.append(child_cpath) # matcher file finding block # search for fs matcher config files for child_cpath in child_cpaths: if self.__fs.is_real_fs() and child_cpath.is_file(): if FsMatcherMapper.exists(child_cpath.name): matcher_cfile = child_cpath # check includer or excluder # TODO: read content of fs matcher files (e.g. gitignore) and then match further relative to that. # make a fs matcher and attache it to the parent. content = "" with self.__fs.open(matcher_cfile, "r", encoding="utf-8") as f: content = f.read() fs_matcher: AbcFsMatcher fs_matcher = FsMatcherMapper.get(matcher_cfile.name)(content) # group will decide where to place includer and where excluder. # root group will forward to appropriate matcher group self.__fs_matcher_root_group.add(fs_matcher, parent) # now list & hash if not ignored for child_cpath in child_cpaths: if not self.__fs_matcher_root_group.should_include(child_cpath): continue if child_cpath.is_file() and do_hash: hash_value = self.__fs.gethash(child_cpath) child_cpath = CFileHashed(child_cpath.names, child_cpath.mtime, child_cpath.size, hash_value) if child_cpath.is_dir(): last_subtree = parent.add(child_cpath) self.__list(last_subtree, do_hash=do_hash) else: parent.add(child_cpath)
def getmtime(self, cpath: CPath): inside_cpath = self.__cdir_tree.get(cpath) if inside_cpath is None: raise CFSException( f'File System Error (occurred during getmtime on cpath: {cpath}):\n' f'X Path does not exist') if inside_cpath.is_dir() or cpath.is_dir(): raise CFSException( f'File System Error (occurred during getmtime on cpath: {cpath}):\n' f'X gmtime on dir') return inside_cpath.mtime
def getsize(self, cpath: CPath): if not cpath.is_file(): raise CFSException( f'File System Error (occurred during getsize on cpath: {cpath}):\n' ) try: res = os.path.getsize(self._full_path(cpath)) except (OSError, IOError) as e: raise CFSException( f'File System Error (occurred during gethash on cpath: {cpath}):\n' f'{str(e)}' ) return res
def getsize(self, cpath: CPath): inside_cpath = self.__cdir_tree.get(cpath) if inside_cpath is None: raise CFSException( f'File System Error (occurred during gethash on cpath: {cpath}):\n' f'X Path not found') if cpath.is_dir() or inside_cpath.is_dir(): raise CFSException( f'File System Error (occurred during getsize on cpath: {cpath}):\n' ) return inside_cpath.size
def is_ancestor(candidate_ancestor: CPath, candidate_descendant: CPath) -> bool: if not candidate_ancestor.is_dir(): raise CFSException( f"Candidate ancestor/parent must pass is_dir: {candidate_ancestor}" ) # TODO: create more specific exception if candidate_ancestor.names_count >= candidate_descendant.names_count: return False ancestor_names = candidate_ancestor.names descendant_names = candidate_descendant.names for idx, ancestor_name in enumerate(ancestor_names): descendant_name = descendant_names[idx] if ancestor_name != descendant_name: return False return True
def gethash(self, cpath: CPath): inside_cpath: CFileHashed = self.__cdir_tree.get(cpath) if inside_cpath is None: raise CFSException( f'File System Error (occurred during gethash on cpath: {cpath}):\n' f'X Path not found') if not cpath.is_file() or not inside_cpath.is_file(): raise CFSException( f'File System Error (occurred during gethash on cpath: {cpath}):\n' f'get hash can only be used on Files') if not isinstance(inside_cpath, CFileHashed): raise CFSException( f'File System Error (occurred during gethash on cpath: {cpath}):\n' f'X file was not hashed') return cpath.hash
def gethash(self, cpath: CPath): if not cpath.is_file(): raise CFSException( f'File System Error (occurred during gethash on cpath: {cpath}):\n' f'get hash can only be used on Files' ) try: BLOCKSIZE = 65536 hasher = config.HASHER() with open(self._full_path(cpath), 'rb') as afile: buf = afile.read(BLOCKSIZE) while len(buf) > 0: hasher.update(buf) buf = afile.read(BLOCKSIZE) res = hasher.hexdigest() except (OSError, IOError) as e: raise CFSException( f'File System Error (occurred during gethash on cpath: {cpath}):\n' f'{str(e)}' ) return res
def _get( self, path: Union[CPath, str, List[str], Tuple[str, ...]] ) -> Union[CFile, 'CDirTree', None]: """ It will return tree (not cdir), cfile or None """ _orig_path = path if isinstance(path, CPath): cpath: CPath = path if self.is_root(): names = cpath.names else: l = self.names r = cpath.names[:self.names_count] if cpath.names_count > self.names_count and l == r: names = cpath.names[self.names_count:] else: return None # raise CFSException(f"Not a match bro!!! l {l} r {r}") else: assert isinstance(path, (list, tuple)) names = CPath.to_cpath_info(path).names assert len(names) > 0, "Programmer's Error" target_tree = self for name in names[:-1]: new_target = target_tree._get_child_tree(name) if new_target is None: return None target_tree = new_target if target_tree._get_child_file(names[-1]) is not None: ret_cpath = target_tree._get_child_file(names[-1]) elif target_tree._get_child_tree(names[-1]) is not None: ret_cpath = target_tree._get_child_tree( names[-1]) # returning the tree else: ret_cpath = None return ret_cpath
def add(self, cpath: CPath) -> 'CDirTree': """ Add any level of descendents :returns: last sub tree """ assert isinstance(cpath, CPath) assert not isinstance(cpath, CDirTree) assert cpath.is_rel, "Cannot add absolute path to a tree" if cpath.is_dir(): cdir: CDir = cpath assert not cdir.is_root( ), f"Programmer's Error - cannot add root dir to a tree or sub tree: {cdir.to_dict()}" assert cpath.names_count > self.names_count if not self.is_root(): assert cpath.names[:self. names_count] == self.names, f"cpath.names {cpath.names} cpath.names[:self.names_count] {cpath.names[:self.names_count]}, self.names {self.names}" # assert cpath.name not in self._child_map, "Cannot add a child twice" TODO: think later whether this old check will be added in the new add check comp_left_names = tuple(cpath.names[:self.names_count]) comp_right_names = tuple(cpath.names[self.names_count:]) assert comp_left_names == self.names, f"Root didn't match: {comp_left_names} <-> {self.names}" self_own_names = comp_left_names target_tree = self # target tree is the tree where the cpath will be added as child _inc_right_names = [ ] # adding right names one by one for testing existence and adding to the tree. for dir_comp_name in comp_right_names[:-1]: _inc_right_names.append(dir_comp_name) new_target = target_tree._get_child_tree(dir_comp_name) if new_target is None: new_target = target_tree._add_child( CDir([*self_own_names, *_inc_right_names])) target_tree = new_target # After the first time of writing this line I indented this one level forward and # there was bugs of duplicate dir in visit, I remember when I indented this one. First time ok, # second time wrong, now okay last_tree = target_tree._add_child(cpath) return last_tree