def get_files_in_dir(path: Path, root: Path, gitignore: PathSpec) -> List[Path]: assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}" for child in path.iterdir(): # First ignore files matching .gitignore if gitignore.match_file(child.as_posix()): continue # Then ignore with `exclude` option. try: normalized_path = "/" + child.resolve().relative_to(root).as_posix() except OSError: continue except ValueError: if child.is_symlink(): continue raise if child.is_dir(): normalized_path += "/" if child.is_dir(): yield from get_files_in_dir(child, root, gitignore) elif child.is_file(): yield child
def get_snakefiles_in_dir( path: Path, include: Pattern[str], exclude: Pattern[str], gitignore: PathSpec ) -> Iterator[Path]: """Generate all files under `path` whose paths are not excluded by the `exclude` regex, but are included by the `include` regex. Adapted from https://github.com/psf/black/blob/ce14fa8b497bae2b50ec48b3bd7022573a59cdb1/black.py#L3519-L3573 """ for child in path.iterdir(): # First ignore files matching .gitignore if gitignore.match_file(child.as_posix()): logging.debug(f"Ignoring: {child} matches .gitignore file content") continue # Then ignore with `exclude` option. normalized_path = str(child.resolve().as_posix()) exclude_match = exclude.search(normalized_path) if exclude_match and exclude_match.group(0): logging.debug(f"Excluded: {child} matched the --exclude regular expression") continue if child.is_dir(): yield from get_snakefiles_in_dir(child, include, exclude, gitignore) elif child.is_file(): include_match = include.search(child.name) if include_match: logging.debug( f"Included: {child} matched the --include regular expression" ) yield child else: logging.debug( f"Ignoring: {child} did not match the --include regular expression" )
def gen_python_files( paths: Iterable[Path], root: Path, gitignore: PathSpec, ) -> Iterator[Path]: """Generate all files under `paths`. Files listed in .gitignore are not considered. """ for child in paths: normalized_path = normalize_path(child, root) if normalized_path is None: continue # First ignore files matching .gitignore if gitignore.match_file(normalized_path): continue if child.is_dir(): yield from gen_python_files( child.iterdir(), root, gitignore, ) elif child.is_file() and str(child).endswith(".py"): yield child
def post_order_lexicographic(top: str, ignore_pathspec: pathspec.PathSpec = None): """ iterates a file system in the order necessary to generate composite tree hashes, bypassing ignored paths. :param top: the directory being iterated :param ignore_pathspec: the pathspec of ignore patterns to match file exclusions against :return: yields results in folder chunks, in the order necessary for composite directory hashes """ # create a sorted list of our immediate children names = os.listdir(top) names.sort() # list of tuples. each tuple contains the child name and whether the child is a directory. children = [] for name in names: file_path = os.path.join(top, name) if ignore_pathspec and ignore_pathspec.match_file(file_path): if os.path.basename(os.path.normpath(file_path)) != ascmhl_folder_name: logger.verbose(f'ignoring filepath {file_path}') continue path = join(top, name) children.append((name, isdir(path))) # if directory, yield children recursively in post order until exhausted. for name, is_dir in children: if is_dir: path = join(top, name) if not os.path.islink(path): for x in post_order_lexicographic(path, ignore_pathspec): yield x # now that all children have been traversed, yield the top (current) directory and all of it's sorted children. yield top, children
def iter_files( paths: Iterable[Path], include: Pattern[str], exclude: Pattern[str], gitignore: PathSpec, ) -> Generator[Path, None, None]: """ Iterate through all files matching given parameters. Highly influenced by Black (https://github.com/psf/black). """ for child in paths: normalized = child.relative_to(ROOT_PATH).as_posix() if gitignore.match_file(normalized): continue normalized = f"/{normalized}" if child.is_dir(): normalized += "/" exclude_match = exclude.search(normalized) if exclude_match is not None and exclude_match.group(0): continue if child.is_dir(): yield from iter_files(child.iterdir(), include, exclude, gitignore) elif child.is_file(): if include.search(normalized) is not None: yield child
def gen_template_files_in_dir( path: Path, root: Path, include: Pattern[str], exclude: Pattern[str], report: "Report", gitignore: PathSpec, ) -> Iterator[Path]: """Generate all files under `path` whose paths are not excluded by the `exclude` regex, but are included by the `include` regex. Symbolic links pointing outside of the `root` directory are ignored. `report` is where output about exclusions goes. """ assert (root.is_absolute() ), f"INTERNAL ERROR: `root` must be absolute but is {root}" for child in path.iterdir(): # First ignore files matching .gitignore if gitignore.match_file(child.as_posix()): report.path_ignored(child, "matches the .gitignore file content") continue # Then ignore with `exclude` option. try: normalized_path = "/" + child.resolve().relative_to( root).as_posix() except OSError as e: report.path_ignored(child, f"cannot be read because {e}") continue except ValueError: if child.is_symlink(): report.path_ignored( child, f"is a symbolic link that points outside {root}") continue raise if child.is_dir(): normalized_path += "/" exclude_match = exclude.search(normalized_path) if exclude_match and exclude_match.group(0): report.path_ignored(child, "matches the --exclude regular expression") continue if child.is_dir(): yield from gen_template_files_in_dir(child, root, include, exclude, report, gitignore) elif child.is_file(): include_match = include.search(normalized_path) if include_match: yield child
def yield_sources( path: Path, include: Pattern[str], exclude: Pattern[str], gitignore: PathSpec, reporter: Report, ) -> Generator: """Yields `.py` paths to handle. Walk throw path sub-directories/files recursively. :param path: A path to start searching from. :param include: regex pattern to be included. :param exclude: regex pattern to be excluded. :param gitignore: gitignore PathSpec object. :param reporter: a `report.Report` object. :returns: generator of `.py` files paths. """ if path.is_file(): if str(path).endswith(PY_EXTENSION): yield path return return dirs: List[str] = [] files: List[str] = [] is_included, is_excluded = regexu.is_included, regexu.is_excluded scandir = os.scandir(path) for entry in scandir: # Skip symlinks. if entry.is_symlink(): continue name = entry.name if entry.is_file() else f"{entry.name}/" entry_path = Path(os.path.join(path, name)) # Compute exclusions. if is_excluded(name, exclude): reporter.ignored_path(entry_path, EXCLUDE) continue # Compute `.gitignore`. if gitignore.match_file(name): reporter.ignored_path(entry_path, GITIGNORE) continue # Directories. if entry.is_dir(): dirs.append(name) continue # Files. if is_included(name, include): files.append(name) else: reporter.ignored_path(entry_path, INCLUDE) for name in files: yield Path(os.path.join(path, name)) for dirname in dirs: dir_path = Path(os.path.join(path, dirname)) yield from yield_sources(dir_path, include, exclude, gitignore, reporter)
def yield_sources( path: Path, include: Pattern[str], exclude: Pattern[str], extend_exclude: Pattern[str], gitignore: PathSpec, reporter: Report, ) -> Generator[Path, None, None]: """Yields `.py` paths to handle. Walk throw path sub-directories/files recursively. :param path: A path to start searching from. :param include: regex pattern to be included. :param exclude: regex pattern to be excluded. :param extend_exclude: regex pattern to be excluded in addition to `exclude`. :param gitignore: gitignore PathSpec object. :param reporter: a `report.Report` object. :returns: generator of `.py` files paths. """ dirs: Set[Path] = set() files: Set[Path] = set() is_included, is_excluded = regexu.is_included, regexu.is_excluded if path.is_dir(): root_dir = os.scandir(path) # type: ignore else: root_dir = {path} # type: ignore path = path.parent for entry in root_dir: entry_path = Path(entry) # Skip symlinks. if entry_path.is_symlink(): continue # Compute exclusions. if is_excluded(entry_path, exclude): reporter.ignored_path(entry_path, EXCLUDE) continue # Compute extended exclusions. if is_excluded(entry_path, extend_exclude): reporter.ignored_path(entry_path, EXCLUDE) continue # Compute `.gitignore`. if gitignore.match_file(entry_path): reporter.ignored_path(entry_path, GITIGNORE) continue # Directories. if entry_path.is_dir(): dirs.add(entry_path) continue # Files. if is_included(entry_path, include): files.add(entry_path) else: reporter.ignored_path(entry_path, INCLUDE) yield from files for dir_ in dirs: # If gitignore is None, gitignore usage is disabled, while a Falsey # gitignore is when the directory doesn't have a .gitignore file. yield from yield_sources( dir_, include, exclude, extend_exclude, gitignore + regexu.get_gitignore(dir_) if gitignore is not None else None, reporter, )
def get_snakefiles_in_dir( path: Path, root: Path, include: Pattern[str], exclude: Pattern[str], gitignore: PathSpec, ) -> Iterator[Path]: """Generate all files under `path` whose paths are not excluded by the `exclude` regex, but are included by the `include` regex. Symbolic links pointing outside of the `root` directory are ignored. `report` is where output about exclusions goes. Adapted from https://github.com/psf/black/blob/ce14fa8b497bae2b50ec48b3bd7022573a59cdb1/black.py#L3519-L3573 """ root = root.resolve() for child in path.iterdir(): # First ignore files matching .gitignore if gitignore.match_file(child.as_posix()): logging.debug(f"Ignoring: {child} matches .gitignore file content") continue # Then ignore with `exclude` option. try: normalized_path = "/" + child.resolve().relative_to( root).as_posix() except OSError as err: logging.debug(f"Ignoring: {child} cannot be read because {err}.") continue except ValueError as err: if child.is_symlink(): logging.debug( f"Ignoring: {child} is a symbolic link that points outside {root}" ) continue logging.error(f"{child} caused error") raise ValueError(err) if child.is_dir(): normalized_path += "/" exclude_match = exclude.search(normalized_path) if exclude_match and exclude_match.group(0): logging.debug( f"Excluded: {child} matched the --exclude regular expression") continue if child.is_dir(): yield from get_snakefiles_in_dir(child, root, include, exclude, gitignore) elif child.is_file(): include_match = include.search(child.name) if include_match: logging.debug( f"Included: {child} matched the --include regular expression" ) yield child else: logging.debug( f"Ignoring: {child} did not match the --include regular expression" )