def analyzePreDir(line, dataFile): # Remove junk proceeding directive location = line.find("#") line = line[location:] preDirectiveLine = "" peekLine = peek(dataFile) directives = line.strip().split("#") for directive in directives: directive = directive.strip() if "include" in directive: if "include " not in directive: directive = directive.replace("include<", "include <") # Make sure the current directive on this line has content splitLine = directive.split(" ") if len(splitLine) <= 1 and not isBlank(peekLine): nextLineDirectives = peekLine.split("#") directive = directive + " " + nextLineDirectives[0] preDirectiveLine += "#" + directive + "\n" elif "define" in directive: # Make sure the define line has a correct space if "define " not in directive: directive = directive.replace("define", "define ") # Make sure the current directive on this line has content splitLine = directive.split(" ") if len(splitLine) <= 1 and not isBlank(peekLine): nextLineDirectives = peekLine.split("#") directive = directive + " " + nextLineDirectives[0] preDirectiveLine += "#" + directive + "\n" return preDirectiveLine
def readNotBlankLine(dataFile): line = dataFile.readline() pos = dataFile.tell() # Used to determine if reached EOF to avoid infinite looping while(isBlank(line) and line is not None): line = dataFile.readline() # Check for EOF newpos = dataFile.tell() if newpos == pos: # stream position hasn't changed -> EOF return("") else: pos = newpos return(line)
def parseComment(line, dataFile, counts): if (any(c in line for c in constants.COMMENTS)): if ("/*" in line and "//" not in line): counts['comments'] += 1 while ("*/" not in line): line = dataFile.readline() # Check if the comment is its own line or part of another line of code location = line.find("//") # Only count blocks of comments once if (location != 0 or peek(dataFile)[0] is not '/'): counts['comments'] += 1 line = line[:location] # Remove the comment part of the line if (isBlank(line)): # Return if the entire line was a comment return True return False # entire line was not a comment
def analyzeLine(line, dataFile, complexityData, counts): # Get rid of blank space at the beginning of the line line = line.lstrip() # Ignore blank lines if(isBlank(line)): return ########################################################################### # General Metrics # ########################################################################### # Count number of lines of comments (blocks count as 1) # Then ignore comments ifComment = parseComment(line, dataFile, counts) if(ifComment): # Ignore comments hanging over into the next line if(not any(end in peek(dataFile) for end in constants.END_COMMENTS)): line = readNotBlankLine(dataFile) return # Remove any comments at the end of lines if(any(c in line for c in constants.COMMENTS)): location = line.find("//") line = line[:location] # Remove the comment part of the line # Number of lines of code (ignores comment only lines) counts['lines'] += 1 # Number of time-related functions (tic/toc/pause/sleep/time) if any(func in line for func in constants.TIME_FUNCTIONS): counts['time'] += 1 # Number of random numbers if any(func in line for func in constants.RAND_FUNCTIONS): counts['rand'] += 1 # Number of input statements if any(ik in line for ik in constants.INPUT_KEYWORDS): counts['input'] += 1 # Number of plotting statements if any(func in line for func in constants.PLOT_FUNCTIONS): counts['plot'] += 1 # Number of print statements if(any(pk in line for pk in constants.PRINT_KEYWORDS) and not any(pk in line for pk in constants.NOT_PRINT_KEYWORDS)): counts['print'] += 1 # Return if any print keywords in line to avoid finding looping and # conditional keywords that are in print statements if (any(wk in line for wk in constants.WRITE_KEYWORDS) and "}" not in line): return ########################################################################### # Halstead Metrics # ########################################################################### tokens = line.split(" ") # Split tokens into operators and operands index = 0 while index < len(tokens): token = tokens[index] # Ignore preprocessor directive lines if constants.PRE_DIRECTIVES.__contains__(token): break # Check for non-mathematical operators (ex. reserved words, storage identifiers) if constants.OPERATORS.__contains__(token): addOperandToDict(complexityData.halOperatorList, token) else: # Check for mathematical operators (ex. <=, {}, &, ;) for op in constants.OPERATOR: if op in token: # Add the operator to the dictionary addOperandToDict(complexityData.halOperatorList, op) # Remove the token # Continue analyzing anything before it # Add anything after it back into tokens opIndex = token.find(op) if(len(token) > opIndex + len(op)): tokens.append(token[opIndex+len(op):]) token = token[:opIndex] # Do not count ),},] operators on their own, but not included in operand token = token.replace(")", '') token = token.replace("}", '') token = token.replace("]", '') # Add what is left of the token to operand if(not isBlank(token)): addOperandToDict(complexityData.halOperandList, token.strip()) index += 1 ########################################################################### # Complex Metrics # ########################################################################### # If entering main function, parse it as a function if("int main(" in line): parseFunction(line, dataFile, complexityData, counts) # Number of user defined functions *look for type, (), and {* if(any(f in line for f in constants.FUNCTION_KEYWORDS) and "(" in line and not any(nf in line for nf in constants.NOT_FUNCTION_KEYWORDS)): if ("{" in line or "{" in peek(dataFile)): counts['userFunc'] += 1 parseFunction(line, dataFile, complexityData, counts) # Number of for loops if (any(wk in line for wk in constants.FOR_KEYWORDS)): counts['forLoops'] += 1 # Continue parsing the file from in parseLoop until the correct "}" is found if(complexityData.inLoop): # If already in a loop, increment nestedLoop counts['nestedLoops'] += 1 parseLoop(dataFile, complexityData, counts) else: complexityData.inLoop = True parseLoop(dataFile, complexityData, counts) complexityData.inLoop = False # Number of while loops if(any(dk in line for dk in constants.DO_KEYWORDS)): # Do not increment here; instead increment when "while();" is found # Continue parsing the file from parseLoop until the correct "}" is found if(complexityData.inLoop): # If already in a loop, increment nestedLoop counts['nestedLoops'] += 1 if('}' not in line and ';' not in line): # Make sure it is not an empty block parseLoop(dataFile, complexityData, counts) else: complexityData.inLoop = True if('}' not in line and ';' not in line): # Make sure it is not an empty block parseLoop(dataFile, complexityData, counts) complexityData.inLoop = False if(any(wk in line for wk in constants.WHILE_KEYWORDS)): counts['whileLoops'] += 1 # Continue parsing the file from parseLoop until the correct "}" is found if(complexityData.inLoop): # If already in a loop, increment nestedLoop counts['nestedLoops'] += 1 if('}' not in line and ';' not in line): # Make sure it is not an empty block parseLoop(dataFile, complexityData, counts) else: complexityData.inLoop = True if('}' not in line and ';' not in line): # Make sure it is not an empty block parseLoop(dataFile, complexityData, counts) complexityData.inLoop = False # Number of if statements if(((any(wk in line for wk in constants.IF_KEYWORDS)) and "else" not in line) and "else" not in peek(dataFile)): counts['ifElse'] += 1 # Continue parsing the file from parseIf until the correct "}" is found if(complexityData.inIf): # If already in a conditional, increment nestedIf counts['nestedIfs'] += 1 if("}" not in line and ';' not in line): # avoid infinite looping if empty block parseIf(dataFile, complexityData, counts) else: complexityData.inIf = True if("}" not in line and ';' not in line): # avoid infinite looping if empty block parseIf(dataFile, complexityData, counts) complexityData.inIf = False if("else" in line): # Parse any else ifs or elses as if statements, but do not add to total num ifs # Continue parsing the file from parseIf until the correct "}" is found if(complexityData.inIf): if("}" not in line): # avoid infinite looping if empty block # Check to make sure the next line does not say "if", which # would cause a recursive call that would mess up "}" secondElseLine = readNotBlankLine(dataFile) if(not any(wk in secondElseLine for wk in constants.IF_KEYWORDS) ): analyzeLine(secondElseLine, dataFile, complexityData, counts) if("}" not in secondElseLine): parseIf(dataFile, complexityData, counts) else: complexityData.inIf = True if("}" not in line): # avoid infinite looping if empty block # Check to make sure the next line does not say "if", which # would cause a recursive call that would mess up "}" secondElseLine = readNotBlankLine(dataFile) if(not any(wk in secondElseLine for wk in constants.IF_KEYWORDS) ): analyzeLine(secondElseLine, dataFile, complexityData, counts) if("}" not in secondElseLine): parseIf(dataFile, complexityData, counts) complexityData.inIf = False # Number of switch cases if(any(s in line for s in constants.SWITCH_KEYWORDS)): counts['switch'] += 1 # Continue parsing the file from parseSwitch until the correct "}" is found parseSwitch(dataFile, complexityData, counts)
def analyzeComment(line, dataFile, outputFile): # Keep combining lines into the comment until a keyword is found on the next line while True: peekLine = peek(dataFile) # Peek at the next line in the file # If any keywords in line, print comment and restart analyzeLine at that point if (any(ck in line for ck in commentKeywords) and not "print" in line): # Handle if the keyword is a for loop (special case) if ("for" in line): # Get the location of "for" forLocation = line.find("for") + 2 # Check that "(" comes directly after it if ("(" in line): parenLocation = line.find("(") if (parenLocation is forLocation + 1 or parenLocation is forLocation + 2): fixForLoop(line, dataFile, outputFile) elif ("(" in peekLine): parenLocation = peekLine.strip().find("(") if (parenLocation is 0): fixForLoop(line, dataFile, outputFile) else: # the "for" was not actually a loop # Otherwise, write out the current part of the comment and get any # remaining part of the comment in the next loop if needed outputFile.write(line.strip() + " ") # Handle all other keywords elif ("{" in line or ("{" in peekLine and not any(ck in peekLine for ck in commentKeywords))): # Split the line into the comment and the comment keyword splitLine = re.search('while|if|else|switch', line, 1) # regular expression if splitLine is not None: # Make sure there was a keyword; should never fail index = splitLine.start(0) # Print the comment comment = line[:index] outputFile.write(comment.strip() + "\n") # Analyze the line starting at the keyword and exit comment analyzeLine(line[index:], dataFile, outputFile) break else: # Otherwise, write out the current part of the comment and get any # remaining part of the comment in the next loop if needed outputFile.write(line.strip() + " ") # Do while # Note: since "int" is a keyword, "print" tends to be found accidentally if isBlank(peekLine) or (any(endC in peekLine for endC in endComments) and not "print" in peekLine): break line = dataFile.readline( ) # only read the next line if it is part of the comment outputFile.write("\n")
def analyzeLine(line, dataFile, outputFile): global noEndChar location = 0 ifEndComment = False endComment = "" line = line.lstrip() # Ignore blank lines if (isBlank(line)): return ########################################################################### # Handle Comment and Preprocessor Directive Lines # ########################################################################### # Handle lines that are only a comment if (len(line) >= 2 and line[0] is "/" and line[1] is "/"): analyzeComment(line, dataFile, outputFile) return # Handle comments that come at the end of other lines of code elif ("//" in line): ifEndComment = True location = line.find("//") endComment = line[location:] line = line[:location] # Remove comment from line to print later # If this is a preprocessor directive line, fix up the line and print it if ("#include" in line or "#define" in line): preDirective = analyzePreDir(line, dataFile) outputFile.write(preDirective) # Print out any comment that goes at the end of a line if (ifEndComment): analyzeComment(endComment, dataFile, outputFile) return ########################################################################### # Handle Various Single-Line Issues # ########################################################################### # Fix type keywords not separated by a space if "int " not in line: line = line.replace("int", "int ") if "long " not in line: line = line.replace("long", "long ") if "void " not in line: line = line.replace("void", "void ") if "bool " not in line: line = line.replace("bool", "bool ") if "float " not in line: line = line.replace("float", "float ") if "double " not in line: line = line.replace("double", "double ") if "char " not in line: line = line.replace("char", "char ") # Handle broken for loops not in comments peekLine = peek(dataFile) if (len(line) > 2 and line[:3] == "for"): # Get the location of "for" forLocation = line.find("for") + 2 # Check that "(" comes directly after it if ("(" in line): parenLocation = line.find("(") if (parenLocation is forLocation + 1 or parenLocation is forLocation + 2): fixForLoop(line, dataFile, outputFile) elif ("(" in peekLine): parenLocation = peekLine.strip().find("(") if (parenLocation is 0): fixForLoop(line, dataFile, outputFile) return ########################################################################### # Print Non-Comment Line # ########################################################################### # If there is a comment at the end of the line, the line of code is complete # so print out the line and then the comment if (ifEndComment): outputFile.write(line.strip() + " ") analyzeComment(endComment, dataFile, outputFile) return # Combine lines separated by a newline that should not be if (not any(ec in line for ec in endChars)): outputFile.write(line.strip() + " ") noEndChar = True else: # Separate lines that have multiple end characters or print full lines noEndChar = False while ("{" in line or "}" in line or ";" in line): # Get the index of the first line break character in line index = next((i for i, ch in enumerate(line) if ch in lineBreaks), None) outputFile.write(line[:index + 1].strip() + "\n") line = line[index + 1:] # Handle broken for loops combined with other lines peekLine = peek(dataFile) if (len(line) > 2 and line[:3] == "for"): # Get the location of "for" forLocation = line.find("for") + 2 # Check that "(" comes directly after it if ("(" in line): parenLocation = line.find("(") if (parenLocation is forLocation + 1 or parenLocation is forLocation + 2): fixForLoop(line, dataFile, outputFile) elif ("(" in peekLine): parenLocation = peekLine.strip().find("(") if (parenLocation is 0): fixForLoop(line, dataFile, outputFile) return # Combine lines now separated by a newline that should not be # From anything remaining on the end of the current line if (not isBlank(line)): outputFile.write(line.strip() + " ") noEndChar = True