def processFile(inputFile, mapOutputFilename): functionGenerator = FunctionGenerator() directiveRegex = re.compile("^# (\d+) \"(.*)\"(.*)") with open(inputFile) as f, open(inputFile + "i", 'w') as output: try: lines = f.readlines() lineIndex = -1 lastChar = '\0' # Logical location in a file based on GNU Preprocessor directives ppFileName = inputFile ppLineNum = 0 # Notes the first filename referenced by the pre-processor directives # which should be the name of the file being compiled. firstFilename = None # Marks at which line the preprocessor can start safely injecting # generated, inlined code. A value of None indicates that the NanoLog # header was not #include-d yet inlineCodeInjectionLineIndex = None # Scan through the lines of the file parsing the preprocessor directives, # identfying log statements, and replacing them with generated code. while lineIndex < len(lines) - 1: lineIndex = lineIndex + 1 line = lines[lineIndex] # Keep track of of the preprocessor line number so that we can # put in our own line markers as we inject code into the file # and report errors. This line number should correspond to the # actual user source line number. ppLineNum = ppLineNum + 1 # Parse special preprocessor directives that follows the format # '# lineNumber "filename" flags' if line[0] == "#": directive = directiveRegex.match(line) if directive: # -1 since the line num describes the line after it, not the # current one, so we decrement it here before looping ppLineNum = int(float(directive.group(1))) - 1 ppFileName = directive.group(2) if not firstFilename: firstFilename = ppFileName flags = directive.group(3).strip() continue if INJECTION_MARKER in line: inlineCodeInjectionLineIndex = lineIndex continue if ppFileName in ignored_files: continue # Scan for instances of the LOG_FUNCTION using a simple heuristic, # which is to search for the LOG_FUNCTION outside of quotes. This # works because at this point, the file should already be pre-processed # by the C/C++ preprocessor so all the comments have been stripped and # all #define's have been resolved. prevWasEscape = False inQuotes = False charOffset = -1 # Optimization: Make sure line has LOG_FUNCTION before doing more work if LOG_FUNCTION not in line: continue while charOffset < len(line) - 1: charOffset = charOffset + 1 c = line[charOffset] # If escape, we don't really care about the next char if c == "\\" or prevWasEscape: prevWasEscape = not prevWasEscape lastChar = c continue if c == "\"": inQuotes = not inQuotes # If we match the first character, cheat a little and scan forward if c == LOG_FUNCTION[0] and not inQuotes: # Check if we've found the log function via the following heuristics # (a) the next n-1 characters spell out the rest of LOG_FUNCTION # (b) the previous character was not an alpha numeric (i.e. not # a part of a longer identifier name) # (c) the next syntactical character after log function is a ( found = True for ii in range(len(LOG_FUNCTION)): if line[charOffset + ii] != LOG_FUNCTION[ii]: found = False break if not found: continue # Valid identifier characters are [a-zA-Z_][a-zA-Z0-9_]* if lastChar.isalnum() or lastChar == '_': continue # Check that it's a function invocation via the existence of ( filePosAfter = FilePosition( lineIndex, charOffset + len(LOG_FUNCTION)) mChar, mPos = peekNextMeaningfulChar( lines, filePosAfter) if mChar != "(": continue # Okay at this point we are pretty sure we have a genuine # log statement, parse it and start modifying the code! logStatement = parseLogStatement( lines, (lineIndex, charOffset)) lastLogStatementLine = logStatement[ 'semiColonPos'].lineNum if len(logStatement['arguments']) < 2: raise ValueError( "NANO_LOG statement expects at least 2 arguments" ": a LogLevel and a literal format string", lines[lineIndex:lastLogStatementLine + 1]) # We expect the log invocation to have the following format: # LOG_FN(LogLevel, FormatString, ...), hence the magic indexes logLevel = logStatement['arguments'][0].source fmtArg = logStatement['arguments'][1] fmtString = extractCString(fmtArg.source) # At this point, we should check that NanoLog was #include-d # and that the format string was a static string if not inlineCodeInjectionLineIndex: raise ValueError( "NANO_LOG statement occurred before " "#include-ing the NanoLog header!", lines[lineIndex:lastLogStatementLine + 1]) if not fmtString: raise ValueError( "NANO_LOG statement expects a literal format " "string for its second argument", lines[lineIndex:lastLogStatementLine + 1]) # Invoke the FunctionGenerator and if it throws a ValueError, # tack on an extra argument to print out the log function itself try: (recordDecl, recordFn ) = functionGenerator.generateLogFunctions( logLevel, fmtString, firstFilename, ppFileName, ppLineNum) except ValueError as e: raise ValueError( e.args[0], lines[lineIndex:lastLogStatementLine + 1]) # Now we're ready to inject the code. What's going to happen is # that the original LOG_FUNCTION will be ripped out and in its # place, a function invocation for the record logic will be # inserted. It will look something like this: # # input: "++i; LOG("Test, %d", 5); ++i;" # output: "i++; # # 1 "injectedCode.fake" # { # __syang0__fl__( # # 10 "original.cc" # "Test, %d", 5); } # # 10 "original.cc" # ++i;" # # Note that we try to preserve spacing and use line preprocessor # directives wherever we can so that if the compiler reports # errors, then the errors can be consistent with the user's view # of the source file. # First we separate the code that comes after the log statement's # semicolon onto its own line while preserving the line spacing # and symbolic reference to the original source file # # Example: # "functionA(); functionB();" # becomes # "functionA(); # # 10 "filename.cc" # functionB();" # # Note that we're working from back to front so that our line # indices don't shift as we insert new lines. scLineNum, scOffset = logStatement['semiColonPos'] scLine = lines[scLineNum] # Extrapolate the symbolic line number scPPLineNum = ppLineNum + (scLineNum - lineIndex) # Split the line scHeadLine = scLine[:scOffset + 1] + "\r\n" scMarker = "# %d \"%s\"\r\n" % (scPPLineNum, ppFileName) scTailLine = " " * (scOffset + 1) + scLine[scOffset + 1:] lines[scLineNum] = scHeadLine lines.insert(scLineNum + 1, scMarker) lines.insert(scLineNum + 2, scTailLine) # update the line we're working with in case the we split it above if scLineNum == lineIndex: line = lines[lineIndex] # Next, we're going to replace the LOG_FUNCTION string from the # first line with our generated function's name and insert # the appropriate preprocessor directives to mark the boundaries # # Example: # "A(); LOG("Hello!);" # Becomes # "A(); # # 10 "injectedCode.fake" # { GENERATED_FUNC_NAME # # 10 "filename.cc" # ("Hello!"); # } # # 10 "filename.cc" # Close off the new scope lines.insert(scLineNum + 1, "}\r\n") offsetAfterLogFn = (charOffset + len(LOG_FUNCTION)) headOfLine = line[:charOffset] tailOfLine = line[offsetAfterLogFn:].rjust(len(line)) lines[lineIndex] = \ headOfLine \ + "\r\n# %d \"injectedCode.fake\"\r\n" % ppLineNum \ + "{ " + recordFn \ + "\r\n# %d \"%s\"\r\n" % (ppLineNum, ppFileName) \ + tailOfLine lastChar = c except ValueError as e: print "\r\n%s:%d: Error - %s\r\n\r\n%s\r\n" % ( ppFileName, ppLineNum, e.args[0], "".join(e.args[1])) sys.exit(1) # Last step, retrieve the generated code and insert it at the end recFns = functionGenerator.getRecordFunctionDefinitionsFor( firstFilename) codeToInject = "\r\n\r\n# 1 \"generatedCode.h\" 3\r\n" \ + "\r\n".join(recFns) if recFns: # Assert is okay here since this should have been caught the first time # we found NANO_LOG without a #include assert inlineCodeInjectionLineIndex lines.insert(inlineCodeInjectionLineIndex + 1, codeToInject) # Output all the lines for line in lines: output.write(line) output.close() functionGenerator.outputMappingFile(mapOutputFilename)
def test_outputCompilationFiles(self): self.maxDiff = None fg = FunctionGenerator() fg.generateLogFunctions("DEBUG", "A", "mar.cc", "mar.cc", 293) fg.generateLogFunctions("DEBUG", "B", "mar.cc", "mar.cc", 294) fg.generateLogFunctions("DEBUG", "C", "mar.cc", "mar.cc", 200) fg.generateLogFunctions("DEBUG", "D %d", "s.cc", "s.cc", 100) fg.outputMappingFile("map1.map") # Also test the merging fg2 = FunctionGenerator() fg2.generateLogFunctions("DEBUG", "A", "mar.cc", "mar.cc", 293) fg2.generateLogFunctions("DEBUG", "A", "mar.cc", "mar.h", 1) fg2.generateLogFunctions("DEBUG", "E", "del.cc", "del.cc", 199) fg2.outputMappingFile("map2.map") # Merge the two map files FunctionGenerator.outputCompilationFiles("test.h", ["map1.map", "map2.map"]) self.assertTrue( filecmp.cmp("test.h", "unitTestData/test_outputCompilationFiles.h")) os.remove("map1.map") os.remove("map2.map") os.remove("test.h")