def __try_regex(self, lines, license_template): """Try finding license with regex of license template. Keyword arguments: lines -- lines of file license_template -- license_template string Returns: Tuple of whether license was found, year, and file contents after license. """ linesep = Task.get_linesep(lines) # Convert the license template to a regex license_rgxstr = "^" + linesep.join(license_template) license_rgxstr = license_rgxstr.replace("*", "\*").replace( ".", "\.").replace("(", "\(").replace(")", "\)").replace( "{year}", "(?P<year>[0-9]+)(-[0-9]+)?").replace( "{padding}", "[ ]*") license_rgx = regex.compile(license_rgxstr, regex.M) # Compare license year = self.__current_year match = license_rgx.search(lines) if match: try: year = match.group("year") except IndexError: pass # If comment at beginning of file is non-empty license, update it return (True, year, linesep + lines[match.end():].lstrip()) else: return (False, year, lines)
def run_pipeline(self, config_file, name, lines): linesep = Task.get_linesep(lines) format_succeeded = True # Tokenize file as brace opens, brace closes, and "using" declarations. # "using" declarations are scoped, so content inside any bracket pair is # considered outside the global namespace. token_regex = re.compile("\{|\}|using .*;") brace_count = 0 for match in token_regex.finditer(lines): token = match.group() if token == "{": brace_count += 1 elif token == "}": brace_count -= 1 elif token.startswith("using"): if brace_count == 0: linenum = lines.count(linesep, 0, match.start()) + 1 if "NOLINT" not in lines.splitlines()[linenum - 1]: format_succeeded = False print(name + ": " + str(linenum) + ": '" + token + \ "' in global namespace") return (lines, False, format_succeeded)
def run_pipeline(self, config_file, name, lines): output = lines.rstrip() + Task.get_linesep(lines) if output != lines: return (output, True, True) else: return (lines, False, True)
def run_pipeline(self, config_file, name, lines): linesep = Task.get_linesep(lines) license_template = Config.read_file( os.path.dirname(os.path.abspath(name)), ".styleguide-license") # Get year when file was most recently modified in Git history # # Committer date is used instead of author date (the one shown by "git # log" because the year the file was last modified in the history should # be used. Author dates can be older than this or even out of order in # the log. cmd = ["git", "log", "-n", "1", "--format=%ci", "--", name] last_year = subprocess.run(cmd, stdout=subprocess.PIPE).stdout.decode()[:4] # If file hasn't been committed yet, use current calendar year as end of # copyright year range if last_year == "": last_year = str(date.today().year) success, first_year, appendix = self.__try_regex( lines, last_year, license_template) if not success: success, first_year, appendix = self.__try_string_search( lines, last_year, license_template) output = "" # Determine copyright range and trailing padding if first_year != last_year: year_range = first_year + "-" + last_year else: year_range = first_year for line in license_template: # Insert copyright year range line = line.replace("{year}", year_range) # Insert padding which expands to the 80th column. If there is more # than one padding token, the line may contain fewer than 80 # characters due to rounding during the padding width calculation. PADDING_TOKEN = "{padding}" padding_count = line.count(PADDING_TOKEN) if padding_count: padding = 80 - len(line) + len(PADDING_TOKEN) * padding_count padding_width = int(padding / padding_count) line = line.replace(PADDING_TOKEN, " " * padding_width) output += line + linesep # Copy rest of original file into new one output += appendix return (output, lines != output, True)
def run_pipeline(self, config_file, name, lines): self.override_regexes = [] # Compile include sorting override regexes for group in [ "includeRelated", "includeCSys", "includeCppSys", "includeOtherLibs", "includeProject" ]: regex_str = config_file.regex(group) self.override_regexes.append(regex.compile(regex_str)) self.linesep = Task.get_linesep(lines) file_name = os.path.basename(name) lines_list = lines.splitlines() # Write lines from beginning of file to headers i = 0 while i < len(lines_list) and ("#ifdef" not in lines_list[i] and "#include" not in lines_list[i]): i += 1 output_list = lines_list[0:i] suboutput, inc_present, idx, valid_headers = self.header_sort( config_file, lines_list, file_name, i, len(lines_list), 0) i = idx # If header failed to classify, return failure if not valid_headers: return (lines, False, False) if suboutput: output_list.extend(suboutput) # Remove extra empty lines from end of includes while len(output_list) > 0 and output_list[-1].rstrip() == "": del output_list[-1] # Remove last newline # Remove possible extra newline from #endif if len(output_list) > 0: output_list[-1] = output_list[-1].rstrip() output_list.append("") # Write rest of file output_list.extend(lines_list[i:]) output = self.linesep.join(output_list).rstrip() + self.linesep if output != lines: return (output, True, True) else: return (lines, False, True)
def run_pipeline(self, config_file, name, lines): linesep = Task.get_linesep(lines) file_changed = False output = "" for line in lines.splitlines(): processed_line = line[0:len(line)].rstrip() if not file_changed and len(line) != len(processed_line): file_changed = True output += processed_line + linesep return (output, file_changed, True)
def run_pipeline(self, config_file, name, lines): linesep = Task.get_linesep(lines) lines_list = lines.split(linesep) output_list = lines_list state = State.FINDING_IFNDEF ifndef_regex = re.compile("#ifndef \w+", re.ASCII) define_regex = re.compile("#define \w+", re.ASCII) if_preproc_count = 0 for i in range(len(lines_list)): if state == State.FINDING_IFNDEF: if lines_list[i].lstrip().startswith("#ifndef ") and \ lines_list[i + 1].lstrip().startswith("#define "): state = State.FINDING_ENDIF guard = self.make_include_guard(config_file, name) output_list[i] = ifndef_regex.sub("#ifndef " + guard, lines_list[i]) output_list[i + 1] = define_regex.sub( "#define " + guard, lines_list[i + 1]) if_preproc_count += 1 elif lines_list[i].lstrip().startswith("#pragma once"): state = State.DONE elif state == State.FINDING_ENDIF: if "#if" in lines_list[i]: if_preproc_count += 1 elif "#endif" in lines_list[i]: if_preproc_count -= 1 if if_preproc_count == 0: state = State.DONE output_list[i] = "#endif // " + guard else: output_list[i] = lines_list[i] else: output_list[i] = lines_list[i] # if include guard not found if state == State.FINDING_IFNDEF: print("Error: " + name + ": doesn't contain include guard or '#pragma once'") return (lines, False, False) output = linesep.join(output_list).rstrip() + linesep if output != lines: return (output, True, True) else: return (lines, False, True)
def run_pipeline(self, config_file, name, lines): linesep = Task.get_linesep(lines) # Find instances of "using namespace std;" or subnamespaces of "std", # but not std::literals or std::chrono_literals. regex = re.compile( "using\s+namespace\s+std(;|::(?!(chrono_)?literals|placeholders))") for match in regex.finditer(lines): linenum = lines.count(linesep, 0, match.start()) + 1 print( "Warning: " + name + ": " + str(linenum) + ": avoid \"using namespace std;\" in production software. While it is used in introductory C++, it pollutes the global namespace with standard library symbols." ) return (lines, False, True)
def run_pipeline(self, config_file, name, lines): linesep = Task.get_linesep(lines) file_changed = False output = "" pos = 0 # Match two or more line separators token_str = r"/\*|\*/|//|" + linesep + r"|class\s[\w\d\s]*{" + \ linesep + r"(?P<extra>(" + linesep + r")+)" token_regex = regex.compile(token_str) in_multicomment = False in_comment = False for match in token_regex.finditer(lines): token = match.group() if token == "/*": in_multicomment = True elif token == "*/": in_multicomment = False in_comment = False elif token == "//": in_comment = True elif token == linesep: in_comment = False elif not in_multicomment and not in_comment: # Otherwise, the token is a class # Removes extra line separators output += lines[pos:match.span("extra")[0]] pos = match.span()[1] file_changed = True # Write rest of file if it wasn't all processed if pos < len(lines): output += lines[pos:] if file_changed: return (output, file_changed, True) else: return (lines, file_changed, True)
def run_pipeline(self, config_file, name, lines): linesep = Task.get_linesep(lines) output = "" brace_prefix = "(?P<prefix>(extern|namespace)\s+[\w\"]*)" brace_postfix = "\s*/(/|\*)[^\r\n]*" brace_regex = re.compile( "(" + brace_prefix + "\s*)?{|" # "{" with optional prefix "\}(" + brace_postfix + ")?") # "}" with optional comment postfix name_stack = [] brace_count = 0 extract_location = 0 for match in brace_regex.finditer(lines): token = match.group() if match.group("prefix"): brace_count += 1 name_stack.append( (brace_count, match.group("prefix").rstrip())) elif "{" in token: brace_count += 1 elif token.startswith("}"): output += lines[extract_location:match.start()] if len(name_stack) > 0 and name_stack[len(name_stack) - 1][0] == brace_count: output += "} // " + name_stack.pop()[1] else: output += lines[match.start():match.end()] extract_location = match.end() brace_count -= 1 # If input has unprocessed lines, write them to output if extract_location < len(lines): output += lines[extract_location:] if output != lines: return (output, True, True) else: return (lines, False, True)
def run_pipeline(self, config_file, name, lines): linesep = Task.get_linesep(lines) license_template = Config.read_file( os.path.dirname(os.path.abspath(name)), ".styleguide-license") success, year, appendix = self.__try_regex(lines, license_template) if not success: success, year, appendix = self.__try_string_search( lines, license_template) output = "" # Determine copyright range and trailing padding if year != self.__current_year: year = year + "-" + self.__current_year for line in license_template: # Insert copyright year range line = line.replace("{year}", year) # Insert padding which expands to the 80th column. If there is more # than one padding token, the line may contain fewer than 80 # characters due to rounding during the padding width calculation. PADDING_TOKEN = "{padding}" padding_count = line.count(PADDING_TOKEN) if padding_count: padding = 80 - len(line) + len(PADDING_TOKEN) * padding_count padding_width = int(padding / padding_count) line = line.replace(PADDING_TOKEN, " " * padding_width) output += line + linesep # Copy rest of original file into new one output += appendix return (output, lines != output, True)
def run_pipeline(self, config_file, name, lines): linesep = Task.get_linesep(lines) output = "" brace_prefix = "(?P<prefix>(extern|namespace)\s+[\w\"]*)" brace_postfix = "[ \t]*/(/|\*)[^\r\n]*" brace_regex = regex.compile( r"/\*|\*/|//|\\\\|\\\"|\"|\\'|'|" + linesep + "|" + \ "(" + brace_prefix + "\s*)?{|" # "{" with optional prefix "}(" + brace_postfix + ")?") # "}" with optional comment postfix name_stack = [] brace_count = 0 extract_location = 0 in_multicomment = False in_singlecomment = False in_string = False in_char = False for match in brace_regex.finditer(lines): token = match.group() if token == "/*": if not in_singlecomment and not in_string and not in_char: in_multicomment = True elif token == "*/": if not in_singlecomment and not in_string and not in_char: in_multicomment = False elif token == "//": if not in_multicomment and not in_string and not in_char: in_singlecomment = True elif in_singlecomment and linesep in token: # Ignore token if it's in a singleline comment. Only check it # for newlines to end the comment. in_singlecomment = False elif in_multicomment or in_singlecomment: # Tokens processed after this branch are ignored if they are in # comments continue elif token == "\\\"": continue elif token == "\"": if not in_char: in_string = not in_string elif token == "\\'": continue elif token == "'": if not in_string: in_char = not in_char elif in_string or in_char: # Tokens processed after this branch are ignored if they are in # double or single quotes continue elif match.group("prefix"): brace_count += 1 name_stack.append( (brace_count, match.group("prefix").rstrip())) elif "{" in token: brace_count += 1 elif token.startswith("}"): output += lines[extract_location:match.start()] if len(name_stack) > 0 and name_stack[len(name_stack) - 1][0] == brace_count: output += "} // " + name_stack.pop()[1] else: output += lines[match.start():match.end()] extract_location = match.end() brace_count -= 1 # If input has unprocessed lines, write them to output if extract_location < len(lines): output += lines[extract_location:] if output != lines: return (output, True, True) else: return (lines, False, True)
def run_pipeline(self, config_file, name, lines): linesep = Task.get_linesep(lines) file_changed = False output = "" pos = 0 # C files use C linkage by default is_c = config_file.is_c_file(name) # Tokenize as extern "C" or extern "C++" with optional {, open brace, # close brace, or () folllowed by { to disambiguate function calls. # extern is first to try matching a brace to it before classifying the # brace as generic. # # Valid function prototypes and definitions have return type, spaces, # function name, optional spaces, then braces. They are followed by ; or # {. # # "def\\s+\w+" matches preprocessor directives "#ifdef" and "#ifndef" so # their contents aren't used as a return type. preproc_str = "#else|#endif|" comment_str = "/\*|\*/|//|" + linesep + "|" string_str = r"\\\\|\\\"|\"|" char_str = r"\\'|'|" extern_str = "(?P<ext_decl>extern \"C(\+\+)?\")\s+(?P<ext_brace>\{)?|" braces_str = "\{|\}|;|def\s+\w+|\w+\s+\w+\s*(?P<paren>\(\))" postfix_str = "(?=\s*(;|\{))" token_regex = regex.compile(preproc_str + comment_str + string_str + char_str + extern_str + braces_str + postfix_str) EXTRA_POP_OFFSET = 2 # If value is greater than pop offset, the value needs to be restored in # addition to an extra stack pop being performed. The pop offset is # removed before assigning to is_c. # # is_c + pop offset == 2: C lang restore that needs extra brace pop # is_c + pop offset == 3: C++ lang restore that needs extra brace pop extern_brace_indices = [is_c] in_preproc_else = False in_multicomment = False in_singlecomment = False in_string = False in_char = False for match in token_regex.finditer(lines): token = match.group() # Skip #else to #endif in case they have braces in them. This # assumes preprocessor directives are only used for conditional # compilation for different platforms and have the same amount of # braces in both branches. Nested preprocessor directives are also # not handled. if token == "#else": in_preproc_else = True elif token == "#endif": in_preproc_else = False if in_preproc_else: continue if token == "/*": if not in_singlecomment and not in_string and not in_char: in_multicomment = True elif token == "*/": if not in_singlecomment and not in_string and not in_char: in_multicomment = False elif token == "//": if not in_multicomment and not in_string and not in_char: in_singlecomment = True elif in_singlecomment and linesep in token: # Ignore token if it's in a singleline comment. Only check it # for newlines to end the comment. in_singlecomment = False elif in_multicomment or in_singlecomment: # Tokens processed after this branch are ignored if they are in # comments continue elif token == "\\\"": continue elif token == "\"": if not in_char: in_string = not in_string elif token == "\\'": continue elif token == "'": if not in_string: in_char = not in_char elif in_string or in_char: # Tokens processed after this branch are ignored if they are in # double or single quotes continue elif token == "{": extern_brace_indices.append(is_c) elif token == "}": is_c = extern_brace_indices.pop() if len(extern_brace_indices) == 0: self.__print_failure(name) return (lines, False, False) # If the next stack frame is from an extern without braces, pop # it. if extern_brace_indices[-1] >= EXTRA_POP_OFFSET: is_c = extern_brace_indices[-1] - EXTRA_POP_OFFSET extern_brace_indices.pop() elif token == ";": if len(extern_brace_indices) == 0: self.__print_failure(name) return (lines, False, False) # If the next stack frame is from an extern without braces, pop # it. if extern_brace_indices[-1] >= EXTRA_POP_OFFSET: is_c = extern_brace_indices[-1] - EXTRA_POP_OFFSET extern_brace_indices.pop() elif token.startswith("extern"): # Back up language setting first if match.group("ext_brace"): extern_brace_indices.append(is_c) else: # Handling an extern without braces changing the language # type is done by treating it as a pseudo-brace that gets # popped as well when the next "}" or ";" is encountered. # The "extra pop" offset is used as a flag on the top stack # value that is checked whenever a pop is performed. extern_brace_indices.append(is_c + EXTRA_POP_OFFSET) # Change language based on extern declaration if match.group("ext_decl") == "extern \"C\"": is_c = True else: is_c = False elif match.group( "paren") and "return " not in match.group() and is_c: # Replaces () with (void) output += lines[pos:match.span("paren")[0]] + "(void)" pos = match.span("paren")[0] + len("()") file_changed = True # Write rest of file if it wasn't all processed if pos < len(lines): output += lines[pos:] # Invariant: extern_brace_indices has one entry success = len(extern_brace_indices) == 1 if not success: self.__print_failure(name) if file_changed: return (output, file_changed, success) else: return (lines, file_changed, success)
def run_pipeline(self, config_file, name, lines): linesep = Task.get_linesep(lines) license_template = Config.read_file( os.path.dirname(os.path.abspath(name)), ".styleguide-license") # Strip newlines at top of file stripped_lines = lines.lstrip().split(linesep) # If a comment at the beginning of the file is considered a license, it # is replaced with an updated license. Otherwise, a license header is # inserted before it. first_comment_is_license = False license_end = 0 # Regex for tokenizing on comment boundaries token_regex = re.compile("^/\*|\*/|^//") in_multiline_comment = False for line in stripped_lines: # If part of comment contains "Copyright (c)", comment is license. if "Copyright (c)" in line: first_comment_is_license = True line_has_comment = False for match in token_regex.finditer(line): # If any comment token was matched, the line has a comment line_has_comment = True token = match.group() if token == "/*": in_multiline_comment = True elif token == "*/": in_multiline_comment = False if not in_multiline_comment and not line_has_comment: break else: license_end += 1 # If comment at beginning of file is non-empty license, update it if first_comment_is_license and license_end > 0: file_parts = \ [linesep.join(stripped_lines[0:license_end]), linesep + linesep.join(stripped_lines[license_end:]).lstrip()] else: file_parts = ["", linesep + lines.lstrip()] # Default year when none is found is current one year = self.__current_year year_regex = re.compile("Copyright \(c\).*\s(20..)") modify_copyright = False for line in file_parts[0].split(linesep): match = year_regex.search(line) # If license contains copyright pattern, extract the first year if match: year = match.group(1) modify_copyright = True break output = "" # Determine copyright range and trailing padding if modify_copyright and year != self.__current_year: year = year + "-" + self.__current_year for line in license_template: # Insert copyright year range line = line.replace("{year}", year) # Insert padding which expands to the 80th column. If there is more # than one padding token, the line may contain fewer than 80 # characters due to rounding during the padding width calculation. PADDING_TOKEN = "{padding}" padding_count = line.count(PADDING_TOKEN) if padding_count: padding = 80 - len(line) + len(PADDING_TOKEN) * padding_count padding_width = int(padding / padding_count) line = line.replace(PADDING_TOKEN, " " * padding_width) output += line + linesep # Copy rest of original file into new one if len(file_parts) > 1: output += file_parts[1] return (output, lines != output, True)
def __try_string_search(self, lines, license_template): """Try finding license with string search. Keyword arguments: lines -- lines of file license_template -- license_template string Returns: Tuple of whether license was found, year, and file contents after license. """ linesep = Task.get_linesep(lines) # Strip newlines at top of file stripped_lines = lines.lstrip().split(linesep) # If a comment at the beginning of the file is considered a license, it # is replaced with an updated license. Otherwise, a license header is # inserted before it. first_comment_is_license = False license_end = 0 # Regex for tokenizing on comment boundaries token_regex = regex.compile("/\*|\*/|^//") in_multiline_comment = False for line in stripped_lines: # If part of comment contains "Copyright (c)", comment is # license. if "Copyright (c)" in line: first_comment_is_license = True line_has_comment = False for match in token_regex.finditer(line): # If any comment token was matched, the line has a comment line_has_comment = True token = match.group() if token == "/*": in_multiline_comment = True elif token == "*/": in_multiline_comment = False if not in_multiline_comment and not line_has_comment: break else: license_end += 1 # If comment at beginning of file is non-empty license, update it year = self.__current_year if first_comment_is_license and license_end > 0: license_part = linesep.join(stripped_lines[0:license_end]) appendix_part = \ linesep + linesep.join(stripped_lines[license_end:]).lstrip() year_regex = regex.compile("Copyright \(c\)(?>.*?\s(20..))") for line in license_part.split(linesep): match = year_regex.search(line) # If license contains copyright pattern, extract the first year if match: year = match.group(1) break return (True, year, appendix_part) else: return (False, year, linesep + lines.lstrip())
def run_pipeline(self, config_file, name, lines): linesep = Task.get_linesep(lines) format_succeeded = True # Tokenize file as brace opens, brace closes, and "using" declarations. # "using" declarations are scoped, so content inside any bracket pair is # considered outside the global namespace. token_regex = regex.compile(r"/\*|\*/|//|\\\\|\\\"|\"|\\'|'|" + linesep + r"|\{|\}|using\s[^;]*;") brace_count = 0 in_multicomment = False in_singlecomment = False in_string = False in_char = False for match in token_regex.finditer(lines): token = match.group() if token == "/*": if not in_singlecomment and not in_string and not in_char: in_multicomment = True elif token == "*/": if not in_singlecomment and not in_string and not in_char: in_multicomment = False elif token == "//": if not in_multicomment and not in_string and not in_char: in_singlecomment = True elif in_singlecomment and linesep in token: # Ignore token if it's in a singleline comment. Only check it # for newlines to end the comment. in_singlecomment = False elif in_multicomment or in_singlecomment: # Tokens processed after this branch are ignored if they are in # comments continue elif token == "\\\"": continue elif token == "\"": if not in_char: in_string = not in_string elif token == "\\'": continue elif token == "'": if not in_string: in_char = not in_char elif in_string or in_char: # Tokens processed after this branch are ignored if they are in # double or single quotes continue elif token == "{": brace_count += 1 elif token == "}": brace_count -= 1 elif token.startswith("using"): if brace_count == 0: linenum = lines.count(linesep, 0, match.start()) + 1 if "NOLINT" not in lines.splitlines()[linenum - 1]: format_succeeded = False print(name + ": " + str(linenum) + ": '" + token + \ "' in global namespace") return (lines, False, format_succeeded)
def run_pipeline(self, config_file, name, lines): linesep = Task.get_linesep(lines) regex_str_sig = r"(/\*(?>(.|\n)*?\*/)\s+)?" + \ r"JNIEXPORT\s+(?P<ret>\w+)\s+JNICALL\s+" + \ r"(?P<func>Java_\w+)\s*\(\s*" + \ r"(?P<env_type>JNIEnv\s*\*\s*)" + \ r"(?P<env_name>\w+)?,\s*jclass\s*(?P<jclass_name>\w*)?" regex_sig = regex.compile(regex_str_sig) regex_str_func = r"Java_(?P<class>\w+)_(?P<method>[^_]+)$" regex_func = regex.compile(regex_str_func) # Matches a comma followed by the type, an optional variable name, and # an optional closing parenthesis regex_str_arg = (r", \s* (?P<arg>(?P<arg_type>[\w\*]+)(\s+ \w+)?)|\)\s*" r"(?P<trailing>{|;)") regex_arg = regex.compile(regex_str_arg, regex.VERBOSE) output = "" pos = 0 for match_sig in regex_sig.finditer(lines): comment = "" signature = "" if match_sig.start() > 0: output += lines[pos:match_sig.start()] # Add JNI-specific args jni_args = " (" if match_sig.group("env_type"): jni_args += match_sig.group("env_type") if match_sig.group("env_name"): jni_args += match_sig.group("env_name") jni_args += ", jclass" if match_sig.group("jclass_name"): jni_args += " " + match_sig.group("jclass_name") # Write JNI function comment. Splitting at "__" removes overload # annotation from method comment match = regex_func.search(match_sig.group("func").split("__")[0]) comment += "/*" + linesep + \ " * Class: " + match.group("class") + linesep + \ " * Method: " + match.group("method") + linesep + \ " * Signature: (" signature += "JNIEXPORT " + match_sig.group("ret") + " JNICALL" + \ linesep + match_sig.group("func") + linesep + jni_args # Add other args line_length = len(jni_args) for match_arg in regex_arg.finditer(lines[match_sig.end():]): if ")" in match_arg.group(): break # If args going past 80 characters elif line_length + len(", ") + len( match_arg.group("arg")) + len(")") > 80: # Put current arg on next line and set line_length to # reflect that signature += "," + linesep + " " + match_arg.group("arg") line_length = len(" " + match_arg.group("arg")) else: signature += ", " + match_arg.group("arg") line_length += len(", ") + len(match_arg.group("arg")) comment += self.map_jni_type(match_arg.group("arg_type")) comment += ")" + self.map_jni_type(match_sig.group("ret")) + linesep + \ " */" + linesep # Output correct trailing character for declaration vs definition if match_arg.group("trailing") == "{": signature += ")" + linesep + "{" else: signature += ");" output += comment + signature pos = match_sig.end() + match_arg.end() # Write rest of file if pos < len(lines): output += lines[pos:] if output == "" or output == lines: return (lines, False, True) else: return (output, True, True)