def get_leading_directory(self, depth=None, consider=None, exclude=None): """Return leading directory of the content within archive Parameters ---------- depth: int or None, optional Maximal depth of leading directories to consider. If None - no upper limit consider : list of str, optional Regular expressions for file/directory names to be considered (before exclude). Applied to the entire relative path to the file as in the archive exclude: list of str, optional Regular expressions for file/directory names to be excluded from consideration. Applied to the entire relative path to the file as in the archive Returns ------- str or None: If there is no single leading directory -- None returned """ leading = None # returns only files, so no need to check if a dir or not for fpath in self.get_extracted_files(): if consider and not any_re_search(consider, fpath): continue if exclude and any_re_search(exclude, fpath): continue lpath = fpath.split(opsep) dpath = lpath[:-1] # directory path components if leading is None: leading = dpath if depth is None else dpath[:depth] else: if dpath[:len(leading)] != leading: # find smallest common path leading_ = [] # TODO: there might be more efficient pythonic way for d1, d2 in zip(leading, dpath): if d1 != d2: break leading_.append(d1) leading = leading_ if not len(leading): # no common leading - ready to exit return None return leading if leading is None else opj(*leading)
def test_any_re_search(): assert_true(any_re_search('a', 'a')) assert_true(any_re_search('a', 'bab')) assert_false(any_re_search('^a', 'bab')) assert_true(any_re_search(['b', '.ab'], 'bab')) assert_false(any_re_search(['^b', 'bab'], 'ab'))