def lines_in_files_containing_expression(expression, root_dir, ignored_regex_objects): """ Searches root_dir and subdirectories for files containing expression. Search is recursive :param expression: regex string pattern to search for e.g. "^[a-zA-Z]+_TESTResult.*" :param root_dir: directory to start search :param ignored_regex_objects: regular expression objects compiled from patterns :return: list of tuples. Each tuple contains file name and list of lines e.g. ('test_result01.txt', ['line 1 a_TESTResult.txt']) """ directories = file_helper.directories_in_dir_recursive( root_dir, ignored_regex_objects) file_lines = [] for directory in directories: # print to show user a simple progress indicator print("Searching " + directory) filenames = file_helper.files_in_dir(directory, ignored_regex_objects) for filename in filenames: lines_in_file = lines_in_file_containing_expression( expression, directory, filename) if lines_in_file is not None: file_lines.append((filename, lines_in_file)) return file_lines
def directories_number_of_files_containing_expression(root_dir, ignored_regex_objects, expression): """ Searches root_dir and subdirectories for files containing expression param ignored_regex_objects contains regular expression objects compiled from patterns return dictionary with key directory name and value number of files that contain expression """ directories = file_helper.directories_in_dir_recursive(root_dir, ignored_regex_objects) results = {} for directory in directories: # print to show user a simple progress indicator print("Searching " + directory) number_of_files_containing_expression = 0 filenames = file_helper.files_in_dir(directory, ignored_regex_objects) for filename in filenames: if search_file(expression, directory, filename) is not None: number_of_files_containing_expression += 1 results[directory] = number_of_files_containing_expression file_singular_or_plural = 'files' if number_of_files_containing_expression == 1: file_singular_or_plural = 'file' print(" found " + str(number_of_files_containing_expression) + " " + file_singular_or_plural) return results
def lines_in_files_containing_expression(expression, root_dir, ignored_regex_objects): """ Searches root_dir and subdirectories for files containing expression. Search is recursive :param expression: regex string pattern to search for e.g. "^[a-zA-Z]+_TESTResult.*" :param root_dir: directory to start search :param ignored_regex_objects: regular expression objects compiled from patterns :return: list of tuples. Each tuple contains file name and list of lines e.g. ('test_result01.txt', ['line 1 a_TESTResult.txt']) """ directories = file_helper.directories_in_dir_recursive(root_dir, ignored_regex_objects) file_lines = [] for directory in directories: # print to show user a simple progress indicator print("Searching " + directory) filenames = file_helper.files_in_dir(directory, ignored_regex_objects) for filename in filenames: lines_in_file = lines_in_file_containing_expression(expression, directory, filename) if lines_in_file is not None: file_lines.append((filename, lines_in_file)) return file_lines
def test_files_in_dir_level_1(self): ignored_regex_objects = expression_helper.regex_objects_from_patterns(expression_helper.ignored_filename_patterns) search_dir_full_path = pathlib.Path('.').joinpath('searcher_data', 'search_dir', 'level_1') actual = file_helper.files_in_dir(search_dir_full_path, ignored_regex_objects) # Don't care about element order, so compare results using set instead of list expected = {'a.txt', 'c.txt alias'} self.assertEqual(expected, set(actual))
def test_files_in_dir_ignore_ython(self): ignored_filename_patterns = [r'\A\.$', r'\A\.\.$', r'\A\.DS_Store$', r'ython'] ignored_regex_objects = expression_helper.regex_objects_from_patterns(ignored_filename_patterns) search_dir_full_path = pathlib.Path('.').joinpath('searcher_data', 'search_dir') actual = file_helper.files_in_dir(search_dir_full_path, ignored_regex_objects) # Don't care about element order, so compare results using set instead of list expected = {'httpwww.beepscore.comhubcape'} self.assertEqual(expected, set(actual))
def test_files_in_dir(self): ignored_filename_patterns = [r'\A\.$', r'\A\.\.$', r'\A\.DS_Store$'] ignored_regex_objects = expression_helper.regex_objects_from_patterns(ignored_filename_patterns) search_dir_full_path = pathlib.Path('.').joinpath('searcher_data', 'search_dir') actual = file_helper.files_in_dir(search_dir_full_path, ignored_regex_objects) # Don't care about element order, so compare results using set instead of list expected = {'httppython.org', 'httpsen.wikipedia.orgwikiPython_%28programming_language%29', 'httpswww.google.com#q=python', 'httpwww.beepscore.comhubcape', } self.assertEqual(expected, set(actual))
def directories_number_of_files_containing_expression(root_dir, ignored_regex_objects, expression): """ Searches root_dir and subdirectories for files containing expression param ignored_regex_objects contains regular expression objects compiled from patterns return dictionary with key directory name and value number of files that contain expression """ directories = file_helper.directories_in_dir_recursive( root_dir, ignored_regex_objects) results = {} for directory in directories: # print to show user a simple progress indicator print("Searching " + directory) number_of_files_containing_expression = 0 filenames = file_helper.files_in_dir(directory, ignored_regex_objects) for filename in filenames: if search_file(expression, directory, filename) is not None: number_of_files_containing_expression += 1 results[directory] = number_of_files_containing_expression file_singular_or_plural = 'files' if number_of_files_containing_expression == 1: file_singular_or_plural = 'file' print(" found " + str(number_of_files_containing_expression) + " " + file_singular_or_plural) return results