Exemplo n.º 1
0
def get_files_in_dir(path: Path, root: Path, gitignore: PathSpec) -> List[Path]:
    assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}"
    for child in path.iterdir():
        # First ignore files matching .gitignore
        if gitignore.match_file(child.as_posix()):
            continue

        # Then ignore with `exclude` option.
        try:
            normalized_path = "/" + child.resolve().relative_to(root).as_posix()
        except OSError:
            continue

        except ValueError:
            if child.is_symlink():
                continue

            raise

        if child.is_dir():
            normalized_path += "/"

        if child.is_dir():
            yield from get_files_in_dir(child, root, gitignore)

        elif child.is_file():
            yield child
Exemplo n.º 2
0
def get_snakefiles_in_dir(
    path: Path, include: Pattern[str], exclude: Pattern[str], gitignore: PathSpec
) -> Iterator[Path]:
    """Generate all files under `path` whose paths are not excluded by the
    `exclude` regex, but are included by the `include` regex.
    Adapted from
    https://github.com/psf/black/blob/ce14fa8b497bae2b50ec48b3bd7022573a59cdb1/black.py#L3519-L3573
    """
    for child in path.iterdir():
        # First ignore files matching .gitignore
        if gitignore.match_file(child.as_posix()):
            logging.debug(f"Ignoring: {child} matches .gitignore file content")
            continue

        # Then ignore with `exclude` option.
        normalized_path = str(child.resolve().as_posix())
        exclude_match = exclude.search(normalized_path)
        if exclude_match and exclude_match.group(0):
            logging.debug(f"Excluded: {child} matched the --exclude regular expression")
            continue

        if child.is_dir():
            yield from get_snakefiles_in_dir(child, include, exclude, gitignore)

        elif child.is_file():
            include_match = include.search(child.name)
            if include_match:
                logging.debug(
                    f"Included: {child} matched the --include regular expression"
                )
                yield child
            else:
                logging.debug(
                    f"Ignoring: {child} did not match the --include regular expression"
                )
Exemplo n.º 3
0
def gen_python_files(
    paths: Iterable[Path],
    root: Path,
    gitignore: PathSpec,
) -> Iterator[Path]:
    """Generate all files under `paths`.

    Files listed in .gitignore are not considered.
    """
    for child in paths:
        normalized_path = normalize_path(child, root)
        if normalized_path is None:
            continue

        # First ignore files matching .gitignore
        if gitignore.match_file(normalized_path):
            continue

        if child.is_dir():
            yield from gen_python_files(
                child.iterdir(),
                root,
                gitignore,
            )

        elif child.is_file() and str(child).endswith(".py"):
            yield child
Exemplo n.º 4
0
def post_order_lexicographic(top: str, ignore_pathspec: pathspec.PathSpec = None):
    """
    iterates a file system in the order necessary to generate composite tree hashes, bypassing ignored paths.

    :param top: the directory being iterated
    :param ignore_pathspec: the pathspec of ignore patterns to match file exclusions against
    :return: yields results in folder chunks, in the order necessary for composite directory hashes
    """
    # create a sorted list of our immediate children
    names = os.listdir(top)
    names.sort()

    # list of tuples. each tuple contains the child name and whether the child is a directory.
    children = []
    for name in names:
        file_path = os.path.join(top, name)
        if ignore_pathspec and ignore_pathspec.match_file(file_path):
            if os.path.basename(os.path.normpath(file_path)) != ascmhl_folder_name:
                logger.verbose(f'ignoring filepath {file_path}')
            continue
        path = join(top, name)
        children.append((name, isdir(path)))

    # if directory, yield children recursively in post order until exhausted.
    for name, is_dir in children:
        if is_dir:
            path = join(top, name)
            if not os.path.islink(path):
                for x in post_order_lexicographic(path, ignore_pathspec):
                    yield x

    # now that all children have been traversed, yield the top (current) directory and all of it's sorted children.
    yield top, children
Exemplo n.º 5
0
def iter_files(
    paths: Iterable[Path],
    include: Pattern[str],
    exclude: Pattern[str],
    gitignore: PathSpec,
) -> Generator[Path, None, None]:
    """
    Iterate through all files matching given parameters.

    Highly influenced by Black (https://github.com/psf/black).
    """
    for child in paths:
        normalized = child.relative_to(ROOT_PATH).as_posix()
        if gitignore.match_file(normalized):
            continue

        normalized = f"/{normalized}"
        if child.is_dir():
            normalized += "/"

        exclude_match = exclude.search(normalized)
        if exclude_match is not None and exclude_match.group(0):
            continue

        if child.is_dir():
            yield from iter_files(child.iterdir(), include, exclude, gitignore)
        elif child.is_file():
            if include.search(normalized) is not None:
                yield child
Exemplo n.º 6
0
def gen_template_files_in_dir(
    path: Path,
    root: Path,
    include: Pattern[str],
    exclude: Pattern[str],
    report: "Report",
    gitignore: PathSpec,
) -> Iterator[Path]:
    """Generate all files under `path` whose paths are not excluded by the
    `exclude` regex, but are included by the `include` regex.
    Symbolic links pointing outside of the `root` directory are ignored.
    `report` is where output about exclusions goes.
    """
    assert (root.is_absolute()
            ), f"INTERNAL ERROR: `root` must be absolute but is {root}"
    for child in path.iterdir():
        # First ignore files matching .gitignore
        if gitignore.match_file(child.as_posix()):
            report.path_ignored(child, "matches the .gitignore file content")
            continue

        # Then ignore with `exclude` option.
        try:
            normalized_path = "/" + child.resolve().relative_to(
                root).as_posix()
        except OSError as e:
            report.path_ignored(child, f"cannot be read because {e}")
            continue

        except ValueError:
            if child.is_symlink():
                report.path_ignored(
                    child, f"is a symbolic link that points outside {root}")
                continue

            raise

        if child.is_dir():
            normalized_path += "/"

        exclude_match = exclude.search(normalized_path)
        if exclude_match and exclude_match.group(0):
            report.path_ignored(child,
                                "matches the --exclude regular expression")
            continue

        if child.is_dir():
            yield from gen_template_files_in_dir(child, root, include, exclude,
                                                 report, gitignore)

        elif child.is_file():
            include_match = include.search(normalized_path)
            if include_match:
                yield child
Exemplo n.º 7
0
Arquivo: pathu.py Projeto: chvmq/pycln
def yield_sources(
    path: Path,
    include: Pattern[str],
    exclude: Pattern[str],
    gitignore: PathSpec,
    reporter: Report,
) -> Generator:
    """Yields `.py` paths to handle. Walk throw path sub-directories/files
    recursively.

    :param path: A path to start searching from.
    :param include: regex pattern to be included.
    :param exclude: regex pattern to be excluded.
    :param gitignore: gitignore PathSpec object.
    :param reporter: a `report.Report` object.
    :returns: generator of `.py` files paths.
    """
    if path.is_file():
        if str(path).endswith(PY_EXTENSION):
            yield path
            return
        return

    dirs: List[str] = []
    files: List[str] = []

    is_included, is_excluded = regexu.is_included, regexu.is_excluded

    scandir = os.scandir(path)
    for entry in scandir:

        # Skip symlinks.
        if entry.is_symlink():
            continue

        name = entry.name if entry.is_file() else f"{entry.name}/"
        entry_path = Path(os.path.join(path, name))

        # Compute exclusions.
        if is_excluded(name, exclude):
            reporter.ignored_path(entry_path, EXCLUDE)
            continue

        # Compute `.gitignore`.
        if gitignore.match_file(name):
            reporter.ignored_path(entry_path, GITIGNORE)
            continue

        # Directories.
        if entry.is_dir():
            dirs.append(name)
            continue

        # Files.
        if is_included(name, include):
            files.append(name)
        else:
            reporter.ignored_path(entry_path, INCLUDE)

    for name in files:
        yield Path(os.path.join(path, name))

    for dirname in dirs:

        dir_path = Path(os.path.join(path, dirname))

        yield from yield_sources(dir_path, include, exclude, gitignore,
                                 reporter)
Exemplo n.º 8
0
def yield_sources(
    path: Path,
    include: Pattern[str],
    exclude: Pattern[str],
    extend_exclude: Pattern[str],
    gitignore: PathSpec,
    reporter: Report,
) -> Generator[Path, None, None]:
    """Yields `.py` paths to handle. Walk throw path sub-directories/files
    recursively.

    :param path: A path to start searching from.
    :param include: regex pattern to be included.
    :param exclude: regex pattern to be excluded.
    :param extend_exclude: regex pattern to be excluded in addition to `exclude`.
    :param gitignore: gitignore PathSpec object.
    :param reporter: a `report.Report` object.
    :returns: generator of `.py` files paths.
    """

    dirs: Set[Path] = set()
    files: Set[Path] = set()

    is_included, is_excluded = regexu.is_included, regexu.is_excluded

    if path.is_dir():
        root_dir = os.scandir(path)  # type: ignore
    else:
        root_dir = {path}  # type: ignore
        path = path.parent

    for entry in root_dir:
        entry_path = Path(entry)

        # Skip symlinks.
        if entry_path.is_symlink():
            continue

        # Compute exclusions.
        if is_excluded(entry_path, exclude):
            reporter.ignored_path(entry_path, EXCLUDE)
            continue

        # Compute extended exclusions.
        if is_excluded(entry_path, extend_exclude):
            reporter.ignored_path(entry_path, EXCLUDE)
            continue

        # Compute `.gitignore`.
        if gitignore.match_file(entry_path):
            reporter.ignored_path(entry_path, GITIGNORE)
            continue

        # Directories.
        if entry_path.is_dir():
            dirs.add(entry_path)
            continue

        # Files.
        if is_included(entry_path, include):
            files.add(entry_path)
        else:
            reporter.ignored_path(entry_path, INCLUDE)

    yield from files

    for dir_ in dirs:
        # If gitignore is None, gitignore usage is disabled, while a Falsey
        # gitignore is when the directory doesn't have a .gitignore file.
        yield from yield_sources(
            dir_,
            include,
            exclude,
            extend_exclude,
            gitignore + regexu.get_gitignore(dir_) if gitignore is not None else None,
            reporter,
        )
Exemplo n.º 9
0
def get_snakefiles_in_dir(
    path: Path,
    root: Path,
    include: Pattern[str],
    exclude: Pattern[str],
    gitignore: PathSpec,
) -> Iterator[Path]:
    """Generate all files under `path` whose paths are not excluded by the
    `exclude` regex, but are included by the `include` regex.
    Symbolic links pointing outside of the `root` directory are ignored.
    `report` is where output about exclusions goes.
    Adapted from
    https://github.com/psf/black/blob/ce14fa8b497bae2b50ec48b3bd7022573a59cdb1/black.py#L3519-L3573
    """
    root = root.resolve()

    for child in path.iterdir():
        # First ignore files matching .gitignore
        if gitignore.match_file(child.as_posix()):
            logging.debug(f"Ignoring: {child} matches .gitignore file content")
            continue

        # Then ignore with `exclude` option.
        try:
            normalized_path = "/" + child.resolve().relative_to(
                root).as_posix()
        except OSError as err:
            logging.debug(f"Ignoring: {child} cannot be read because {err}.")
            continue
        except ValueError as err:
            if child.is_symlink():
                logging.debug(
                    f"Ignoring: {child} is a symbolic link that points outside {root}"
                )
                continue
            logging.error(f"{child} caused error")
            raise ValueError(err)

        if child.is_dir():
            normalized_path += "/"

        exclude_match = exclude.search(normalized_path)
        if exclude_match and exclude_match.group(0):
            logging.debug(
                f"Excluded: {child} matched the --exclude regular expression")
            continue

        if child.is_dir():
            yield from get_snakefiles_in_dir(child, root, include, exclude,
                                             gitignore)

        elif child.is_file():
            include_match = include.search(child.name)
            if include_match:
                logging.debug(
                    f"Included: {child} matched the --include regular expression"
                )
                yield child
            else:
                logging.debug(
                    f"Ignoring: {child} did not match the --include regular expression"
                )