예제 #1
0
    def __init__(self):
        Task.__init__(self)

        # There are 5 header groups:
        # 0. Related headers
        # 1. C system headers (includes standard library headers)
        # 2. C++ system headers (includes standard library headers)
        # 3. Other library headers
        # 4. Project headers
        #
        # See comments below for how headers are classified.

        # Header type 0: Related headers
        # Base name of include matches base name of current file

        # Header type 1: C standard library headers
        self.c_std = [
            "assert.h", "complex.h", "ctype.h", "errno.h", "fenv.h", "float.h",
            "inttypes.h", "iso646.h", "limits.h", "locale.h", "math.h",
            "setjmp.h", "signal.h", "stdalign.h", "stdarg.h", "stdatomic.h",
            "stdbool.h", "stddef.h", "stdint.h", "stdio.h", "stdlib.h",
            "stdnoreturn.h", "string.h", "tgmath.h", "threads.h", "time.h",
            "uchar.h", "wchar.h", "wctype.h"
        ]

        # Header type 1: C system headers
        self.c_sys_regex = regex.compile("<[a-z][A-Za-z0-9/_-]*\.h>")

        # Header type 2: C++ standard library headers
        self.cpp_std = [
            "cstdlib", "csignal", "csetjmp", "cstdarg", "typeinfo", "typeindex",
            "type_traits", "bitset", "functional", "utility", "ctime", "chrono",
            "cstddef", "initializer_list", "tuple", "new", "memory",
            "scoped_allocator", "climits", "cfloat", "cstdint", "cinttypes",
            "limits", "exception", "stdexcept", "cassert", "system_error",
            "cerrno", "cctype", "cwctype", "cstring", "cwchar", "cuchar",
            "string", "array", "vector", "deque", "list", "forward_list", "set",
            "map", "unordered_set", "unordered_map", "stack", "queue",
            "algorithm", "iterator", "cmath", "complex", "valarray", "random",
            "numeric", "ratio", "cfenv", "iosfwd", "ios", "istream", "ostream",
            "iostream", "fstream", "sstream", "strstream", "iomanip",
            "streambuf", "cstdio", "locale", "clocale", "codecvt", "regex",
            "atomic", "thread", "mutex", "shared_mutex", "future",
            "condition_variable", "ciso646", "ccomplex", "ctgmath", "cstdalign",
            "cstdbool", "any", "filesystem", "optional", "string_view",
            "variant"
        ]

        # Header type 3: Other library headers
        # They use angle brackets (open_bracket group is angle bracket)
        #
        # Header type 4: Project headers
        # They use double quotes (all other headers)
        self.header_regex = regex.compile("(?P<comment>//\s*)?"
                                          "\#include\s*"
                                          "(?P<header>"
                                          "(?P<open_bracket><|\")"
                                          "(?P<name>[^>\"]*)"
                                          "(?P<close_bracket>>|\"))"
                                          "(?P<postfix>.*)$")
예제 #2
0
    def __init__(self, current_year):
        """Constructor for LicenseUpdate task.

        Keyword arguments:
        current_year -- year string
        """
        Task.__init__(self)

        self.__current_year = current_year
예제 #3
0
    def __init__(self, clang_version):
        """Constructor for ClangFormat task.

        Keyword arguments:
        clang_version -- version number of clang-format appended to executable
                         name
        """
        Task.__init__(self)

        if clang_version == "":
            self.exec_name = "clang-format"
        else:
            self.exec_name = "clang-format-" + clang_version
예제 #4
0
    def __try_regex(self, lines, license_template):
        """Try finding license with regex of license template.

        Keyword arguments:
        lines -- lines of file
        license_template -- license_template string

        Returns:
        Tuple of whether license was found, year, and file contents after license.
        """
        linesep = Task.get_linesep(lines)

        # Convert the license template to a regex
        license_rgxstr = "^" + linesep.join(license_template)
        license_rgxstr = license_rgxstr.replace("*", "\*").replace(
            ".", "\.").replace("(", "\(").replace(")", "\)").replace(
                "{year}", "(?P<year>[0-9]+)(-[0-9]+)?").replace(
                    "{padding}", "[ ]*")
        license_rgx = regex.compile(license_rgxstr, regex.M)

        # Compare license
        year = self.__current_year
        match = license_rgx.search(lines)
        if match:
            try:
                year = match.group("year")
            except IndexError:
                pass

            # If comment at beginning of file is non-empty license, update it
            return (True, year, linesep + lines[match.end():].lstrip())
        else:
            return (False, year, lines)
예제 #5
0
    def run_pipeline(self, config_file, name, lines):
        linesep = Task.get_linesep(lines)
        format_succeeded = True

        # Tokenize file as brace opens, brace closes, and "using" declarations.
        # "using" declarations are scoped, so content inside any bracket pair is
        # considered outside the global namespace.
        token_regex = re.compile("\{|\}|using .*;")

        brace_count = 0
        for match in token_regex.finditer(lines):
            token = match.group()

            if token == "{":
                brace_count += 1
            elif token == "}":
                brace_count -= 1
            elif token.startswith("using"):
                if brace_count == 0:
                    linenum = lines.count(linesep, 0, match.start()) + 1
                    if "NOLINT" not in lines.splitlines()[linenum - 1]:
                        format_succeeded = False
                        print(name + ": " + str(linenum) + ": '" + token + \
                              "' in global namespace")

        return (lines, False, format_succeeded)
예제 #6
0
    def make_include_guard(self, config_file, name):
        """Returns properly formatted include guard based on repository root and
        file name.

        Keyword arguments:
        config_file -- Config object
        name -- file name string
        """
        repo_root_name_override = config_file.group("repoRootNameOverride")

        repo_root = Task.get_repo_root()
        guard_root = os.path.relpath(name, repo_root)
        if not repo_root_name_override:
            guard_path = os.path.basename(repo_root) + os.sep
        else:
            guard_path = repo_root_name_override[0] + os.sep
        include_roots = config_file.group("includeGuardRoots")

        if include_roots:
            prefix = ""
            for include_root in include_roots:
                if guard_root.startswith(
                        include_root) and len(include_root) > len(prefix):
                    prefix = include_root
            guard_path += guard_root[len(prefix):]
            return (regex.sub("[^a-zA-Z0-9]", "_", guard_path).upper() +
                    "_").lstrip("_")

        # No include guard roots matched, so append full name
        guard_path += guard_root
        return regex.sub("[^a-zA-Z0-9]", "_", guard_path).upper() + "_"
예제 #7
0
    def run_pipeline(self, config_file, name, lines):
        output = lines.rstrip() + Task.get_linesep(lines)

        if output != lines:
            return (output, True, True)
        else:
            return (lines, False, True)
예제 #8
0
    def run_pipeline(self, config_file, name, lines):
        linesep = Task.get_linesep(lines)

        license_template = Config.read_file(
            os.path.dirname(os.path.abspath(name)), ".styleguide-license")

        # Get year when file was most recently modified in Git history
        #
        # Committer date is used instead of author date (the one shown by "git
        # log" because the year the file was last modified in the history should
        # be used. Author dates can be older than this or even out of order in
        # the log.
        cmd = ["git", "log", "-n", "1", "--format=%ci", "--", name]
        last_year = subprocess.run(cmd,
                                   stdout=subprocess.PIPE).stdout.decode()[:4]

        # If file hasn't been committed yet, use current calendar year as end of
        # copyright year range
        if last_year == "":
            last_year = str(date.today().year)

        success, first_year, appendix = self.__try_regex(
            lines, last_year, license_template)
        if not success:
            success, first_year, appendix = self.__try_string_search(
                lines, last_year, license_template)

        output = ""

        # Determine copyright range and trailing padding
        if first_year != last_year:
            year_range = first_year + "-" + last_year
        else:
            year_range = first_year

        for line in license_template:
            # Insert copyright year range
            line = line.replace("{year}", year_range)

            # Insert padding which expands to the 80th column. If there is more
            # than one padding token, the line may contain fewer than 80
            # characters due to rounding during the padding width calculation.
            PADDING_TOKEN = "{padding}"
            padding_count = line.count(PADDING_TOKEN)
            if padding_count:
                padding = 80 - len(line) + len(PADDING_TOKEN) * padding_count
                padding_width = int(padding / padding_count)
                line = line.replace(PADDING_TOKEN, " " * padding_width)

            output += line + linesep

        # Copy rest of original file into new one
        output += appendix

        return (output, lines != output, True)
예제 #9
0
    def run_pipeline(self, config_file, name, lines):
        self.override_regexes = []

        # Compile include sorting override regexes
        for group in [
                "includeRelated", "includeCSys", "includeCppSys",
                "includeOtherLibs", "includeProject"
        ]:
            regex_str = config_file.regex(group)
            self.override_regexes.append(regex.compile(regex_str))

        self.linesep = Task.get_linesep(lines)

        file_name = os.path.basename(name)

        lines_list = lines.splitlines()

        # Write lines from beginning of file to headers
        i = 0
        while i < len(lines_list) and ("#ifdef" not in lines_list[i] and
                                       "#include" not in lines_list[i]):
            i += 1
        output_list = lines_list[0:i]

        suboutput, inc_present, idx, valid_headers = self.header_sort(
            config_file, lines_list, file_name, i, len(lines_list), 0)
        i = idx

        # If header failed to classify, return failure
        if not valid_headers:
            return (lines, False, False)

        if suboutput:
            output_list.extend(suboutput)

        # Remove extra empty lines from end of includes
        while len(output_list) > 0 and output_list[-1].rstrip() == "":
            del output_list[-1]  # Remove last newline

        # Remove possible extra newline from #endif
        if len(output_list) > 0:
            output_list[-1] = output_list[-1].rstrip()
            output_list.append("")

        # Write rest of file
        output_list.extend(lines_list[i:])

        output = self.linesep.join(output_list).rstrip() + self.linesep
        if output != lines:
            return (output, True, True)
        else:
            return (lines, False, True)
예제 #10
0
    def run_pipeline(self, config_file, name, lines):
        linesep = Task.get_linesep(lines)

        file_changed = False
        output = ""

        for line in lines.splitlines():
            processed_line = line[0:len(line)].rstrip()
            if not file_changed and len(line) != len(processed_line):
                file_changed = True
            output += processed_line + linesep

        return (output, file_changed, True)
예제 #11
0
    def run_pipeline(self, config_file, name, lines):
        linesep = Task.get_linesep(lines)
        lines_list = lines.split(linesep)
        output_list = lines_list

        state = State.FINDING_IFNDEF
        ifndef_regex = re.compile("#ifndef \w+", re.ASCII)
        define_regex = re.compile("#define \w+", re.ASCII)

        if_preproc_count = 0
        for i in range(len(lines_list)):
            if state == State.FINDING_IFNDEF:
                if lines_list[i].lstrip().startswith("#ifndef ") and \
                    lines_list[i + 1].lstrip().startswith("#define "):
                    state = State.FINDING_ENDIF

                    guard = self.make_include_guard(config_file, name)
                    output_list[i] = ifndef_regex.sub("#ifndef " + guard,
                                                      lines_list[i])
                    output_list[i + 1] = define_regex.sub(
                        "#define " + guard, lines_list[i + 1])
                    if_preproc_count += 1
                elif lines_list[i].lstrip().startswith("#pragma once"):
                    state = State.DONE
            elif state == State.FINDING_ENDIF:
                if "#if" in lines_list[i]:
                    if_preproc_count += 1
                elif "#endif" in lines_list[i]:
                    if_preproc_count -= 1

                if if_preproc_count == 0:
                    state = State.DONE
                    output_list[i] = "#endif  // " + guard
                else:
                    output_list[i] = lines_list[i]
            else:
                output_list[i] = lines_list[i]

        # if include guard not found
        if state == State.FINDING_IFNDEF:
            print("Error: " + name +
                  ": doesn't contain include guard or '#pragma once'")
            return (lines, False, False)

        output = linesep.join(output_list).rstrip() + linesep

        if output != lines:
            return (output, True, True)
        else:
            return (lines, False, True)
예제 #12
0
    def run_pipeline(self, config_file, name, lines):
        linesep = Task.get_linesep(lines)

        # Find instances of "using namespace std;" or subnamespaces of "std",
        # but not std::literals or std::chrono_literals.
        regex = re.compile(
            "using\s+namespace\s+std(;|::(?!(chrono_)?literals|placeholders))")

        for match in regex.finditer(lines):
            linenum = lines.count(linesep, 0, match.start()) + 1
            print(
                "Warning: " + name + ": " + str(linenum) +
                ": avoid \"using namespace std;\" in production software. While it is used in introductory C++, it pollutes the global namespace with standard library symbols."
            )

        return (lines, False, True)
예제 #13
0
    def run_pipeline(self, config_file, name, lines):
        linesep = Task.get_linesep(lines)
        file_changed = False

        output = ""
        pos = 0

        # Match two or more line separators
        token_str = r"/\*|\*/|//|" + linesep + r"|class\s[\w\d\s]*{" + \
            linesep + r"(?P<extra>(" + linesep + r")+)"
        token_regex = regex.compile(token_str)

        in_multicomment = False
        in_comment = False

        for match in token_regex.finditer(lines):
            token = match.group()

            if token == "/*":
                in_multicomment = True
            elif token == "*/":
                in_multicomment = False
                in_comment = False
            elif token == "//":
                in_comment = True
            elif token == linesep:
                in_comment = False
            elif not in_multicomment and not in_comment:
                # Otherwise, the token is a class

                # Removes extra line separators
                output += lines[pos:match.span("extra")[0]]
                pos = match.span()[1]

                file_changed = True

        # Write rest of file if it wasn't all processed
        if pos < len(lines):
            output += lines[pos:]

        if file_changed:
            return (output, file_changed, True)
        else:
            return (lines, file_changed, True)
예제 #14
0
    def run_pipeline(self, config_file, name, lines):
        linesep = Task.get_linesep(lines)
        output = ""

        brace_prefix = "(?P<prefix>(extern|namespace)\s+[\w\"]*)"
        brace_postfix = "\s*/(/|\*)[^\r\n]*"

        brace_regex = re.compile(
            "(" + brace_prefix + "\s*)?{|"  # "{" with optional prefix
            "\}(" + brace_postfix + ")?")  # "}" with optional comment postfix

        name_stack = []
        brace_count = 0
        extract_location = 0
        for match in brace_regex.finditer(lines):
            token = match.group()

            if match.group("prefix"):
                brace_count += 1
                name_stack.append(
                    (brace_count, match.group("prefix").rstrip()))
            elif "{" in token:
                brace_count += 1
            elif token.startswith("}"):
                output += lines[extract_location:match.start()]
                if len(name_stack) > 0 and name_stack[len(name_stack) -
                                                      1][0] == brace_count:
                    output += "}  // " + name_stack.pop()[1]
                else:
                    output += lines[match.start():match.end()]
                extract_location = match.end()
                brace_count -= 1

        # If input has unprocessed lines, write them to output
        if extract_location < len(lines):
            output += lines[extract_location:]

        if output != lines:
            return (output, True, True)
        else:
            return (lines, False, True)
예제 #15
0
    def run_pipeline(self, config_file, name, lines):
        linesep = Task.get_linesep(lines)

        license_template = Config.read_file(
            os.path.dirname(os.path.abspath(name)), ".styleguide-license")

        success, year, appendix = self.__try_regex(lines, license_template)
        if not success:
            success, year, appendix = self.__try_string_search(
                lines, license_template)

        output = ""

        # Determine copyright range and trailing padding
        if year != self.__current_year:
            year = year + "-" + self.__current_year

        for line in license_template:
            # Insert copyright year range
            line = line.replace("{year}", year)

            # Insert padding which expands to the 80th column. If there is more
            # than one padding token, the line may contain fewer than 80
            # characters due to rounding during the padding width calculation.
            PADDING_TOKEN = "{padding}"
            padding_count = line.count(PADDING_TOKEN)
            if padding_count:
                padding = 80 - len(line) + len(PADDING_TOKEN) * padding_count
                padding_width = int(padding / padding_count)
                line = line.replace(PADDING_TOKEN, " " * padding_width)

            output += line + linesep

        # Copy rest of original file into new one
        output += appendix

        return (output, lines != output, True)
예제 #16
0
    def make_include_guard(self, config_file, name):
        """Returns properly formatted include guard based on repository root and
        file name.

        Keyword arguments:
        config_file -- Config object
        name -- file name string
        """
        repo_root = Task.get_repo_root()

        name = os.path.relpath(name, repo_root)
        guard_path = os.path.basename(repo_root) + "/"
        include_roots = config_file.group("includeGuardRoots")

        if include_roots:
            for include_root in include_roots:
                if name.startswith(include_root):
                    guard_path += name[len(include_root):]
                    return re.sub("[^a-zA-Z0-9]", "_",
                                  guard_path).upper() + "_"

        # No include guard roots matched, so append full name
        guard_path += name
        return re.sub("[^a-zA-Z0-9]", "_", guard_path).upper() + "_"
예제 #17
0
    def __init__(self):
        Task.__init__(self)

        self.headers = []

        # assert is a macro, so it's ommitted to avoid prefixing with std::
        self.headers.append(Header("assert"))

        self.headers.append(
            Header(
                "ctype", {
                    "isalum", "isalpha", "isblank", "iscntrl", "isdigit",
                    "isgraph", "islower", "isprint", "ispunct", "isspace",
                    "isupper", "isxdigit", "tolower", "toupper"
                }))
        self.headers.append(Header("errno"))
        self.headers.append(Header("float"))
        self.headers.append(Header("limits"))
        self.headers.append(
            Header(
                "math", {
                    "cos", "acos", "cosh", "acosh", "sin", "asin", "asinh",
                    "tan", "atan", "atan2", "atanh", "exp", "frexp", "ldexp",
                    "log", "log10", "ilogb", "log1p", "log2", "logb", "modf",
                    "exp2", "expm1", "scalbl", "scalbln", "pow", "sqrt", "cbrt",
                    "hypot", "erf", "erfc", "tgamma", "lgamma", "ceil", "floor",
                    "fmod", "trunc", "round", "lround", "llround", "rint",
                    "lrint", "llrint", "nearbyint", "remainder", "remquo",
                    "copysign", "nan", "nextafter", "nexttoward", "fdim",
                    "fmax", "fmin", "fma", "fpclassify", "abs", "fabs",
                    "signbit", "isfinite", "isinf", "isnan", "isnormal",
                    "isgreater", "isgreaterequal", "isless", "islessequal",
                    "islessgreater", "isunordered"
                }))
        self.headers.append(
            Header("setjmp", {"longjmp", "setjmp"}, ["jmp_buf"]))
        self.headers.append(
            Header("signal", {"signal", "raise"}, ["sig_atomic_t"], False))
        self.headers.append(Header("stdarg", {"va_list"}))
        self.headers.append(
            Header("stddef", type_regexes=["(ptrdiff|max_align|nullptr)_t"]))

        # size_t isn't actually defined in stdint, but it fits best here for
        # removing the std:: prefix
        self.headers.append(
            Header(
                "stdint",
                type_regexes=[
                    "((u?int((_fast|_least)?(8|16|32|64)|max|ptr)|size)_t)"
                ],
                add_prefix=False))

        self.headers.append(
            Header(
                "stdio", {
                    "remove", "rename", "rewind", "tmpfile", "tmpnam", "fclose",
                    "fflush", "fopen", "freopen", "fgetc", "fgets", "fputc",
                    "fputs", "fread", "fwrite", "fgetpos", "fseek", "fsetpos",
                    "ftell", "feof", "ferror", "setbuf", "setvbuf", "fprintf",
                    "snprintf", "sprintf", "vfprintf", "vprintf", "vsnprintf",
                    "vsprintf", "printf", "fscanf", "sscanf", "vfscanf",
                    "vscanf", "vsscanf", "scanf", "getchar", "gets", "putc",
                    "putchar", "puts", "getc", "ungetc", "clearerr", "perror"
                }, ["FILE", "fpos_t"]))
        self.headers.append(
            Header(
                "stdlib", {
                    "atof", "atoi", "atol", "atoll", "strtof", "strtol",
                    "strtod", "strtold", "strtoll", "strtoul", "strtoull",
                    "rand", "srand", "free", "calloc", "malloc", "realloc",
                    "abort", "at_quick_exit", "quick_exit", "atexit", "exit",
                    "getenv", "system", "_Exit", "bsearch", "qsort", "llabs",
                    "labs", "abs", "lldiv", "ldiv", "div", "mblen", "btowc",
                    "wctomb", "wcstombs", "mbstowcs"
                }, ["(l|ll)?div_t"]))
        self.headers.append(
            Header(
                "string", {
                    "memcpy", "memcmp", "memchr", "memmove", "memset", "strcpy",
                    "strncpy", "strcat", "strncat", "strcmp", "strncmp",
                    "strcoll", "strchr", "strrchr", "strstr", "strxfrm",
                    "strcspn", "strrspn", "strpbrk", "strtok", "strerror",
                    "strlen"
                }))
        self.headers.append(
            Header(
                "time", {
                    "clock", "asctime", "ctime", "difftime", "gmtime",
                    "localtime", "mktime", "strftime", "time"
                }, ["(clock|time)_t"]))
예제 #18
0
    def run_pipeline(self, config_file, name, lines):
        linesep = Task.get_linesep(lines)

        license_template = Config.read_file(
            os.path.dirname(os.path.abspath(name)), ".styleguide-license")

        # Strip newlines at top of file
        stripped_lines = lines.lstrip().split(linesep)

        # If a comment at the beginning of the file is considered a license, it
        # is replaced with an updated license. Otherwise, a license header is
        # inserted before it.
        first_comment_is_license = False
        license_end = 0

        # Regex for tokenizing on comment boundaries
        token_regex = re.compile("^/\*|\*/|^//")

        in_multiline_comment = False
        for line in stripped_lines:
            # If part of comment contains "Copyright (c)", comment is license.
            if "Copyright (c)" in line:
                first_comment_is_license = True

            line_has_comment = False
            for match in token_regex.finditer(line):
                # If any comment token was matched, the line has a comment
                line_has_comment = True

                token = match.group()

                if token == "/*":
                    in_multiline_comment = True
                elif token == "*/":
                    in_multiline_comment = False
            if not in_multiline_comment and not line_has_comment:
                break
            else:
                license_end += 1

        # If comment at beginning of file is non-empty license, update it
        if first_comment_is_license and license_end > 0:
            file_parts = \
                [linesep.join(stripped_lines[0:license_end]),
                 linesep + linesep.join(stripped_lines[license_end:]).lstrip()]
        else:
            file_parts = ["", linesep + lines.lstrip()]

        # Default year when none is found is current one
        year = self.__current_year

        year_regex = re.compile("Copyright \(c\).*\s(20..)")
        modify_copyright = False
        for line in file_parts[0].split(linesep):
            match = year_regex.search(line)
            # If license contains copyright pattern, extract the first year
            if match:
                year = match.group(1)
                modify_copyright = True
                break

        output = ""

        # Determine copyright range and trailing padding
        if modify_copyright and year != self.__current_year:
            year = year + "-" + self.__current_year

        for line in license_template:
            # Insert copyright year range
            line = line.replace("{year}", year)

            # Insert padding which expands to the 80th column. If there is more
            # than one padding token, the line may contain fewer than 80
            # characters due to rounding during the padding width calculation.
            PADDING_TOKEN = "{padding}"
            padding_count = line.count(PADDING_TOKEN)
            if padding_count:
                padding = 80 - len(line) + len(PADDING_TOKEN) * padding_count
                padding_width = int(padding / padding_count)
                line = line.replace(PADDING_TOKEN, " " * padding_width)

            output += line + linesep

        # Copy rest of original file into new one
        if len(file_parts) > 1:
            output += file_parts[1]

        return (output, lines != output, True)
예제 #19
0
def main():
    # Parse command-line arguments
    parser = argparse.ArgumentParser(
        description=
        "Runs all formatting tasks on the code base. This should be invoked from a directory within the project."
    )
    parser.add_argument(
        "-v",
        dest="verbose1",
        action="store_true",
        help="verbosity level 1 (prints names of processed files)")
    parser.add_argument(
        "-vv",
        dest="verbose2",
        action="store_true",
        help=
        "verbosity level 2 (prints names of processed files and tasks run on them)"
    )
    parser.add_argument(
        "-j",
        dest="jobs",
        type=int,
        default=mp.cpu_count(),
        help="number of jobs to run (default is number of cores)")
    parser.add_argument(
        "-y",
        dest="year",
        type=int,
        default=date.today().year,
        help=
        "year to use when updating license headers (default is current year)")
    parser.add_argument(
        "-clang",
        dest="clang_version",
        type=str,
        default="",
        help=
        "version suffix for clang-format (invokes \"clang-format-CLANG_VERSION\" or \"clang-format\" if no suffix provided)"
    )
    parser.add_argument(
        "-f",
        dest="file",
        type=str,
        default="",
        nargs="+",
        help=
        "file or directory names (can be path relative to python invocation directory or absolute path)"
    )
    args = parser.parse_args()

    # All discovered files are relative to Git repo root directory, so find the
    # root.
    root_path = Task.get_repo_root()
    if root_path == "":
        print("Error: not invoked within a Git repository", file=sys.stderr)
        sys.exit(1)

    # If no files explicitly specified
    if not args.file:
        # Delete temporary files from previous incomplete run
        files = [
            os.path.join(dp, f)
            for dp, dn, fn in os.walk(root_path)
            for f in fn
            if f.endswith(".tmp")
        ]
        for f in files:
            os.remove(f)

        # Recursively create list of files in given directory
        files = [
            os.path.join(dp, f) for dp, dn, fn in os.walk(root_path) for f in fn
        ]

        if not files:
            print("Error: no files found to format", file=sys.stderr)
            sys.exit(1)
    else:
        files = []
        for name in args.file:
            # If a directory was specified, recursively expand it
            if os.path.isdir(name):
                files.extend([
                    os.path.join(dp, f)
                    for dp, dn, fn in os.walk(name)
                    for f in fn
                ])
            else:
                files.append(name)

    # Convert relative paths of files to absolute paths
    files = [os.path.abspath(name) for name in files]

    # Don't run tasks on Git metadata
    files = [name for name in files if os.sep + ".git" + os.sep not in name]

    # Don't check for changes in or run tasks on ignored files
    files = filter_ignored_files(files)

    # Create list of all changed files
    changed_file_list = []

    output_list = subprocess.run(
        ["git", "diff", "--name-only", "master"],
        stdout=subprocess.PIPE).stdout.split()
    for line in output_list:
        changed_file_list.append(root_path + os.sep +
                                 line.strip().decode("ascii"))

    # Don't run tasks on modifiable or generated files
    work = []
    for name in files:
        config_file = Config(os.path.dirname(name), ".styleguide")

        if config_file.is_modifiable_file(name):
            continue
        if config_file.is_generated_file(name):
            # Emit warning if a generated file was editted
            if name in changed_file_list:
                print("Warning: generated file '" + name + "' modified")
            continue

        work.append(name)
    files = work

    # If there are no files left, do nothing
    if len(files) == 0:
        sys.exit(0)

    # Prepare file batches for batch tasks
    chunksize = math.ceil(len(files) / args.jobs)
    file_batches = [
        files[i:i + chunksize] for i in range(0, len(files), chunksize)
    ]

    # IncludeOrder is run after Stdlib so any C std headers changed to C++ or
    # vice versa are sorted properly. ClangFormat is run after the other tasks
    # so it can clean up their formatting.
    task_pipeline = [
        BraceComment(),
        CIdentList(),
        IncludeGuard(),
        LicenseUpdate(str(args.year)),
        JavaClass(),
        Newline(),
        Stdlib(),
        IncludeOrder(),
        UsingDeclaration(),
        UsingNamespaceStd(),
        Whitespace()
    ]
    run_pipeline(task_pipeline, args, files)

    task_pipeline = [ClangFormat(args.clang_version)]
    run_batch(task_pipeline, args, file_batches)

    # These tasks fix clang-format formatting
    task_pipeline = [Jni()]
    run_pipeline(task_pipeline, args, files)

    # Lint is run last since previous tasks can affect its output.
    task_pipeline = [PyFormat(), Lint()]
    run_batch(task_pipeline, args, file_batches)
예제 #20
0
    def run_pipeline(self, config_file, name, lines):
        linesep = Task.get_linesep(lines)
        output = ""

        brace_prefix = "(?P<prefix>(extern|namespace)\s+[\w\"]*)"
        brace_postfix = "[ \t]*/(/|\*)[^\r\n]*"

        brace_regex = regex.compile(
            r"/\*|\*/|//|\\\\|\\\"|\"|\\'|'|" + linesep + "|" + \
            "(" + brace_prefix + "\s*)?{|"  # "{" with optional prefix
            "}(" + brace_postfix + ")?")  # "}" with optional comment postfix

        name_stack = []
        brace_count = 0
        extract_location = 0
        in_multicomment = False
        in_singlecomment = False
        in_string = False
        in_char = False
        for match in brace_regex.finditer(lines):
            token = match.group()

            if token == "/*":
                if not in_singlecomment and not in_string and not in_char:
                    in_multicomment = True
            elif token == "*/":
                if not in_singlecomment and not in_string and not in_char:
                    in_multicomment = False
            elif token == "//":
                if not in_multicomment and not in_string and not in_char:
                    in_singlecomment = True
            elif in_singlecomment and linesep in token:
                # Ignore token if it's in a singleline comment. Only check it
                # for newlines to end the comment.
                in_singlecomment = False
            elif in_multicomment or in_singlecomment:
                # Tokens processed after this branch are ignored if they are in
                # comments
                continue
            elif token == "\\\"":
                continue
            elif token == "\"":
                if not in_char:
                    in_string = not in_string
            elif token == "\\'":
                continue
            elif token == "'":
                if not in_string:
                    in_char = not in_char
            elif in_string or in_char:
                # Tokens processed after this branch are ignored if they are in
                # double or single quotes
                continue
            elif match.group("prefix"):
                brace_count += 1
                name_stack.append(
                    (brace_count, match.group("prefix").rstrip()))
            elif "{" in token:
                brace_count += 1
            elif token.startswith("}"):
                output += lines[extract_location:match.start()]
                if len(name_stack) > 0 and name_stack[len(name_stack) -
                                                      1][0] == brace_count:
                    output += "}  // " + name_stack.pop()[1]
                else:
                    output += lines[match.start():match.end()]
                extract_location = match.end()
                brace_count -= 1

        # If input has unprocessed lines, write them to output
        if extract_location < len(lines):
            output += lines[extract_location:]

        if output != lines:
            return (output, True, True)
        else:
            return (lines, False, True)
예제 #21
0
    def run_pipeline(self, config_file, name, lines):
        linesep = Task.get_linesep(lines)
        file_changed = False

        output = ""
        pos = 0

        # C files use C linkage by default
        is_c = config_file.is_c_file(name)

        # Tokenize as extern "C" or extern "C++" with optional {, open brace,
        # close brace, or () folllowed by { to disambiguate function calls.
        # extern is first to try matching a brace to it before classifying the
        # brace as generic.
        #
        # Valid function prototypes and definitions have return type, spaces,
        # function name, optional spaces, then braces. They are followed by ; or
        # {.
        #
        # "def\\s+\w+" matches preprocessor directives "#ifdef" and "#ifndef" so
        # their contents aren't used as a return type.
        preproc_str = "#else|#endif|"
        comment_str = "/\*|\*/|//|" + linesep + "|"
        string_str = r"\\\\|\\\"|\"|"
        char_str = r"\\'|'|"
        extern_str = "(?P<ext_decl>extern \"C(\+\+)?\")\s+(?P<ext_brace>\{)?|"
        braces_str = "\{|\}|;|def\s+\w+|\w+\s+\w+\s*(?P<paren>\(\))"
        postfix_str = "(?=\s*(;|\{))"
        token_regex = regex.compile(preproc_str + comment_str + string_str +
                                    char_str + extern_str + braces_str +
                                    postfix_str)

        EXTRA_POP_OFFSET = 2

        # If value is greater than pop offset, the value needs to be restored in
        # addition to an extra stack pop being performed. The pop offset is
        # removed before assigning to is_c.
        #
        # is_c + pop offset == 2: C lang restore that needs extra brace pop
        # is_c + pop offset == 3: C++ lang restore that needs extra brace pop
        extern_brace_indices = [is_c]

        in_preproc_else = False
        in_multicomment = False
        in_singlecomment = False
        in_string = False
        in_char = False
        for match in token_regex.finditer(lines):
            token = match.group()

            # Skip #else to #endif in case they have braces in them. This
            # assumes preprocessor directives are only used for conditional
            # compilation for different platforms and have the same amount of
            # braces in both branches. Nested preprocessor directives are also
            # not handled.
            if token == "#else":
                in_preproc_else = True
            elif token == "#endif":
                in_preproc_else = False
            if in_preproc_else:
                continue

            if token == "/*":
                if not in_singlecomment and not in_string and not in_char:
                    in_multicomment = True
            elif token == "*/":
                if not in_singlecomment and not in_string and not in_char:
                    in_multicomment = False
            elif token == "//":
                if not in_multicomment and not in_string and not in_char:
                    in_singlecomment = True
            elif in_singlecomment and linesep in token:
                # Ignore token if it's in a singleline comment. Only check it
                # for newlines to end the comment.
                in_singlecomment = False
            elif in_multicomment or in_singlecomment:
                # Tokens processed after this branch are ignored if they are in
                # comments
                continue
            elif token == "\\\"":
                continue
            elif token == "\"":
                if not in_char:
                    in_string = not in_string
            elif token == "\\'":
                continue
            elif token == "'":
                if not in_string:
                    in_char = not in_char
            elif in_string or in_char:
                # Tokens processed after this branch are ignored if they are in
                # double or single quotes
                continue
            elif token == "{":
                extern_brace_indices.append(is_c)
            elif token == "}":
                is_c = extern_brace_indices.pop()

                if len(extern_brace_indices) == 0:
                    self.__print_failure(name)
                    return (lines, False, False)

                # If the next stack frame is from an extern without braces, pop
                # it.
                if extern_brace_indices[-1] >= EXTRA_POP_OFFSET:
                    is_c = extern_brace_indices[-1] - EXTRA_POP_OFFSET
                    extern_brace_indices.pop()
            elif token == ";":
                if len(extern_brace_indices) == 0:
                    self.__print_failure(name)
                    return (lines, False, False)

                # If the next stack frame is from an extern without braces, pop
                # it.
                if extern_brace_indices[-1] >= EXTRA_POP_OFFSET:
                    is_c = extern_brace_indices[-1] - EXTRA_POP_OFFSET
                    extern_brace_indices.pop()
            elif token.startswith("extern"):
                # Back up language setting first
                if match.group("ext_brace"):
                    extern_brace_indices.append(is_c)
                else:
                    # Handling an extern without braces changing the language
                    # type is done by treating it as a pseudo-brace that gets
                    # popped as well when the next "}" or ";" is encountered.
                    # The "extra pop" offset is used as a flag on the top stack
                    # value that is checked whenever a pop is performed.
                    extern_brace_indices.append(is_c + EXTRA_POP_OFFSET)

                # Change language based on extern declaration
                if match.group("ext_decl") == "extern \"C\"":
                    is_c = True
                else:
                    is_c = False
            elif match.group(
                    "paren") and "return " not in match.group() and is_c:
                # Replaces () with (void)
                output += lines[pos:match.span("paren")[0]] + "(void)"
                pos = match.span("paren")[0] + len("()")

                file_changed = True

        # Write rest of file if it wasn't all processed
        if pos < len(lines):
            output += lines[pos:]

        # Invariant: extern_brace_indices has one entry
        success = len(extern_brace_indices) == 1
        if not success:
            self.__print_failure(name)

        if file_changed:
            return (output, file_changed, success)
        else:
            return (lines, file_changed, success)
예제 #22
0
    def run_pipeline(self, config_file, name, lines):
        linesep = Task.get_linesep(lines)
        format_succeeded = True

        # Tokenize file as brace opens, brace closes, and "using" declarations.
        # "using" declarations are scoped, so content inside any bracket pair is
        # considered outside the global namespace.
        token_regex = regex.compile(r"/\*|\*/|//|\\\\|\\\"|\"|\\'|'|" +
                                    linesep + r"|\{|\}|using\s[^;]*;")

        brace_count = 0
        in_multicomment = False
        in_singlecomment = False
        in_string = False
        in_char = False
        for match in token_regex.finditer(lines):
            token = match.group()

            if token == "/*":
                if not in_singlecomment and not in_string and not in_char:
                    in_multicomment = True
            elif token == "*/":
                if not in_singlecomment and not in_string and not in_char:
                    in_multicomment = False
            elif token == "//":
                if not in_multicomment and not in_string and not in_char:
                    in_singlecomment = True
            elif in_singlecomment and linesep in token:
                # Ignore token if it's in a singleline comment. Only check it
                # for newlines to end the comment.
                in_singlecomment = False
            elif in_multicomment or in_singlecomment:
                # Tokens processed after this branch are ignored if they are in
                # comments
                continue
            elif token == "\\\"":
                continue
            elif token == "\"":
                if not in_char:
                    in_string = not in_string
            elif token == "\\'":
                continue
            elif token == "'":
                if not in_string:
                    in_char = not in_char
            elif in_string or in_char:
                # Tokens processed after this branch are ignored if they are in
                # double or single quotes
                continue
            elif token == "{":
                brace_count += 1
            elif token == "}":
                brace_count -= 1
            elif token.startswith("using"):
                if brace_count == 0:
                    linenum = lines.count(linesep, 0, match.start()) + 1
                    if "NOLINT" not in lines.splitlines()[linenum - 1]:
                        format_succeeded = False
                        print(name + ": " + str(linenum) + ": '" + token + \
                              "' in global namespace")

        return (lines, False, format_succeeded)
예제 #23
0
def test_includeguard():
    test = TaskTest(IncludeGuard())

    repo_root = os.path.basename(Task.get_repo_root()).upper()

    # Fix incorrect include guard
    test.add_input("./Test.h",
        "#ifndef WRONG_H" + os.linesep + \
        "#define WRONG_C" + os.linesep + \
        os.linesep + \
        "#endif" + os.linesep)
    test.add_output(
        "#ifndef " + repo_root + "_TEST_H_" + os.linesep + \
        "#define " + repo_root + "_TEST_H_" + os.linesep + \
        os.linesep + \
        "#endif  // " + repo_root + "_TEST_H_" + os.linesep, True, True)

    # Ensure nested preprocessor statements are handled properly for incorrect
    # include guard
    test.add_input("./Test.h",
        "#ifndef WRONG_H" + os.linesep + \
        "#define WRONG_C" + os.linesep + \
        os.linesep + \
        "#if SOMETHING" + os.linesep + \
        "// do something" + os.linesep + \
        "#endif" + os.linesep + \
        "#endif" + os.linesep)
    test.add_output(
        "#ifndef " + repo_root + "_TEST_H_" + os.linesep + \
        "#define " + repo_root + "_TEST_H_" + os.linesep + \
        os.linesep + \
        "#if SOMETHING" + os.linesep + \
        "// do something" + os.linesep + \
        "#endif" + os.linesep + \
        "#endif  // " + repo_root + "_TEST_H_" + os.linesep, True, True)

    # Don't touch correct include guard
    test.add_input("./Test.h",
        "#ifndef " + repo_root + "_TEST_H_" + os.linesep + \
        "#define " + repo_root + "_TEST_H_" + os.linesep + \
        os.linesep + \
        "#endif  // " + repo_root + "_TEST_H_" + os.linesep)
    test.add_latest_input_as_output(True)

    # Fail on missing include guard
    test.add_input("./Test.h", "// Empty file" + os.linesep)
    test.add_latest_input_as_output(False)

    # Verify pragma once counts as include guard
    test.add_input("./Test.h", "#pragma once" + os.linesep)
    test.add_latest_input_as_output(True)

    # Ensure include guard roots are processed correctly
    test.add_input("./Test.h",
        "#ifndef " + repo_root + "_WPIFORMAT_TEST_H_" + os.linesep + \
        "#define " + repo_root + "_WPIFORMAT_TEST_H_" + os.linesep + \
        os.linesep + \
        "#endif  // " + repo_root + "_WPIFORMAT_TEST_H_" + os.linesep)
    test.add_output(
        "#ifndef " + repo_root + "_TEST_H_" + os.linesep + \
        "#define " + repo_root + "_TEST_H_" + os.linesep + \
        os.linesep + \
        "#endif  // " + repo_root + "_TEST_H_" + os.linesep, True, True)

    # Ensure leading underscores are removed (this occurs if the user doesn't
    # include a trailing "/" in the include guard root)
    test.add_input("./Test/Test.h",
        "#ifndef " + repo_root + "_WPIFORMAT_TEST_TEST_H_" + os.linesep + \
        "#define " + repo_root + "_WPIFORMAT_TEST_TEST_H_" + os.linesep + \
        os.linesep + \
        "#endif  // " + repo_root + "_WPIFORMAT_TEST_TEST_H_" + os.linesep)
    test.add_output(
        "#ifndef " + repo_root + "_TEST_H_" + os.linesep + \
        "#define " + repo_root + "_TEST_H_" + os.linesep + \
        os.linesep + \
        "#endif  // " + repo_root + "_TEST_H_" + os.linesep, True, True)

    test.run(OutputType.FILE)
예제 #24
0
    def __try_string_search(self, lines, license_template):
        """Try finding license with string search.

        Keyword arguments:
        lines -- lines of file
        license_template -- license_template string

        Returns:
        Tuple of whether license was found, year, and file contents after license.
        """
        linesep = Task.get_linesep(lines)

        # Strip newlines at top of file
        stripped_lines = lines.lstrip().split(linesep)

        # If a comment at the beginning of the file is considered a license, it
        # is replaced with an updated license. Otherwise, a license header is
        # inserted before it.
        first_comment_is_license = False
        license_end = 0

        # Regex for tokenizing on comment boundaries
        token_regex = regex.compile("/\*|\*/|^//")

        in_multiline_comment = False
        for line in stripped_lines:
            # If part of comment contains "Copyright (c)", comment is
            # license.
            if "Copyright (c)" in line:
                first_comment_is_license = True

            line_has_comment = False
            for match in token_regex.finditer(line):
                # If any comment token was matched, the line has a comment
                line_has_comment = True

                token = match.group()

                if token == "/*":
                    in_multiline_comment = True
                elif token == "*/":
                    in_multiline_comment = False
            if not in_multiline_comment and not line_has_comment:
                break
            else:
                license_end += 1

        # If comment at beginning of file is non-empty license, update it
        year = self.__current_year
        if first_comment_is_license and license_end > 0:
            license_part = linesep.join(stripped_lines[0:license_end])
            appendix_part = \
                linesep + linesep.join(stripped_lines[license_end:]).lstrip()

            year_regex = regex.compile("Copyright \(c\)(?>.*?\s(20..))")
            for line in license_part.split(linesep):
                match = year_regex.search(line)
                # If license contains copyright pattern, extract the first year
                if match:
                    year = match.group(1)
                    break

            return (True, year, appendix_part)
        else:
            return (False, year, linesep + lines.lstrip())
예제 #25
0
파일: jni.py 프로젝트: Daltz333/styleguide
    def run_pipeline(self, config_file, name, lines):
        linesep = Task.get_linesep(lines)

        regex_str_sig = r"(/\*(?>(.|\n)*?\*/)\s+)?" + \
            r"JNIEXPORT\s+(?P<ret>\w+)\s+JNICALL\s+" + \
            r"(?P<func>Java_\w+)\s*\(\s*" + \
            r"(?P<env_type>JNIEnv\s*\*\s*)" + \
            r"(?P<env_name>\w+)?,\s*jclass\s*(?P<jclass_name>\w*)?"
        regex_sig = regex.compile(regex_str_sig)

        regex_str_func = r"Java_(?P<class>\w+)_(?P<method>[^_]+)$"
        regex_func = regex.compile(regex_str_func)

        # Matches a comma followed by the type, an optional variable name, and
        # an optional closing parenthesis
        regex_str_arg = (r", \s* (?P<arg>(?P<arg_type>[\w\*]+)(\s+ \w+)?)|\)\s*"
                         r"(?P<trailing>{|;)")
        regex_arg = regex.compile(regex_str_arg, regex.VERBOSE)

        output = ""
        pos = 0
        for match_sig in regex_sig.finditer(lines):
            comment = ""
            signature = ""

            if match_sig.start() > 0:
                output += lines[pos:match_sig.start()]

            # Add JNI-specific args
            jni_args = "  ("
            if match_sig.group("env_type"):
                jni_args += match_sig.group("env_type")
            if match_sig.group("env_name"):
                jni_args += match_sig.group("env_name")
            jni_args += ", jclass"
            if match_sig.group("jclass_name"):
                jni_args += " " + match_sig.group("jclass_name")

            # Write JNI function comment. Splitting at "__" removes overload
            # annotation from method comment
            match = regex_func.search(match_sig.group("func").split("__")[0])
            comment += "/*" + linesep + \
                " * Class:     " + match.group("class") + linesep + \
                " * Method:    " + match.group("method") + linesep + \
                " * Signature: ("

            signature += "JNIEXPORT " + match_sig.group("ret") + " JNICALL" + \
                linesep + match_sig.group("func") + linesep + jni_args

            # Add other args
            line_length = len(jni_args)
            for match_arg in regex_arg.finditer(lines[match_sig.end():]):
                if ")" in match_arg.group():
                    break
                # If args going past 80 characters
                elif line_length + len(", ") + len(
                        match_arg.group("arg")) + len(")") > 80:
                    # Put current arg on next line and set line_length to
                    # reflect that
                    signature += "," + linesep + "   " + match_arg.group("arg")
                    line_length = len("   " + match_arg.group("arg"))
                else:
                    signature += ", " + match_arg.group("arg")
                    line_length += len(", ") + len(match_arg.group("arg"))
                comment += self.map_jni_type(match_arg.group("arg_type"))
            comment += ")" + self.map_jni_type(match_sig.group("ret")) + linesep + \
                " */" + linesep

            # Output correct trailing character for declaration vs definition
            if match_arg.group("trailing") == "{":
                signature += ")" + linesep + "{"
            else:
                signature += ");"

            output += comment + signature

            pos = match_sig.end() + match_arg.end()

        # Write rest of file
        if pos < len(lines):
            output += lines[pos:]

        if output == "" or output == lines:
            return (lines, False, True)
        else:
            return (output, True, True)