def getResultSimilarityMatrix(input,fileName): # iterate rows = input.shape[0] #print("Rows:", rows) result = np.zeros(shape=(rows,rows)) for x in range(0, rows): for y in range(0, rows): #print(input[x], input[y], calSimilarityIndex(input[x], input[y])) result[x,y]=calSimilarityIndex(input[x], input[y]) #print(result) eu.printNumpyArrayInExcel(result, fileName) #eu.writeToExcellists(result,sheetName,fileName) return result
def getAllOptimizedPaths(G, printToExcel, filename, dirname): all_paths = [] for i in G.edges(): print("\n") for path in nx.all_simple_paths(G, source=i[0], target=i[1]): print(i) print(path) if (len(path) > 5): all_paths.append(path) if (printToExcel): workbook = xlsxwriter.Workbook(filename) eu.writeToExcellists(all_paths, "pathsoptimized", workbook) createPathGraphFromList(all_paths, dirname + 'path.dot') return all_paths
def getAllPathsWithoutAbbreviations(G, printToExcel, dirname): sink_nodes = [ node for node, outdegree in G.out_degree(G.nodes()).items() if outdegree == 0 ] source_nodes = [ node for node, indegree in G.in_degree(G.nodes()).items() if indegree == 0 ] #for ((source, sink) for sink in sink_nodes for source in source_nodes): # for path in nx.all_simple_paths(G, source=source, target=sink): # print(path) all_paths = [] for i in G.edges(): print("\n") for path in nx.all_simple_paths(G, source=i[0], target=i[1]): print(i) print(path) all_paths.append(path) if (printToExcel): workbook = xlsxwriter.Workbook('pathsNoAbbreviation.xlsx') eu.writeToExcellists(all_paths, "paths", workbook) return all_paths
def analyze(dirname,directory): start = TimeUtility.start() anu=an.Abbreviations() rootNode = Node(dirname) file_paths = [] # List which will store all of the full filepaths. fileFxnDictionary = {} fileImportDictionary = {} fileClassDictionary = {} callerFxnArgumentsDictionary={} callerCalleeFxn={} calleFxnArguments={} fileFxnCount={} fileImportCount={} fileClassCount={} uniqueImports=[] callerCalleePath=[] fxnGraph = nx.DiGraph() fxnGraphFull=nx.DiGraph() fxnList=[] classList=[] importList=[] for root, directories, files in os.walk(directory): # Walk the tree. for filename in files: str=filename.__str__() #print("root",root) filepath=root.replace("\\","/") # Join the two strings in order to form the full filepath. filepath=filepath+"/"+filename #filepath = os.path.join(root, filename) file_paths.append(filepath) # Add it to the list. #module = importlib.import_module(filepath) #my_class = getattr(module, 'MyClass') #my_instance = my_class() #dir() # #print("filepath : ",filepath," members are :",dir(module(filepath))) #print("filepath : ", filepath) if(not filepath.endswith(".py")): continue localFileNode = Node(filepath.replace(directory,""), parent=rootNode) file = open(filepath, "r+",encoding="utf8") variable=[] functionName=[] className=[] importModules=[] isNextWordfxn=False isNextWordclass=False isNextWordImport=False isNewWord=False classChildren=False fxnChildren=False mainFxnNode=None clsNode=None for word in file.read().split(): if(stopWordsRemoval.isStopWord(word) or word.__len__()<=2): continue if(isNextWordclass): if(word.__contains__("(")) : className.append(word) classChildren=True clsNode = Node("class:" + word, parent=localFileNode) isNextWordclass=False #print(word) elif(isNextWordfxn): print("fxnname",word) if("(" in word): arg=word.split("(")[1] word=word.split("(")[0] functionName.append(word.lower()) isNextWordfxn=False if(classChildren and (not fxnChildren)): mainFxnNode = Node("Fxn:" + word, parent=clsNode) elif(fxnChildren): fxnNode = Node("Fxn:" + word, parent=mainFxnNode) callerCalleeFxn.update({mainFxnNode.name,fxnChildren}) else: fxnNode = Node("Fxn:" + word, parent=localFileNode) fxnChildren = True #print(word) elif(isNextWordImport): importModules.append(word) isNextWordImport=False importNode = Node("Import:" + word, parent=localFileNode) #print(word) if (word == "def"): isNextWordfxn = True isNewWord=True fxnChildren = False #print("true") elif (word == "class"): isNextWordclass = True isNewWord=True fxnChildren=False elif (word == "import"): isNextWordImport = True isNewWord=True elif (checkWordForValidFunction(word) and fxnChildren): print("got new function lets see::::",word) if(fxnChildren and mainFxnNode is not None): fxnNode = Node("Fxn:" + word, parent=mainFxnNode) callerCalleeFxn[mainFxnNode.name]=[word] fxnGraph.add_edge(anu.get(su.sanitize(mainFxnNode.name)),anu.get(su.sanitize(word))) fxnGraphFull.add_edge(su.sanitize(mainFxnNode.name),su.sanitize(word)) # print("File:",filepath,"Functions:",functionName) #print("File:",filepath,"Classes:", className) #print("File:",filepath,"Import:", importModules) if(len(functionName) != 0): fileFxnDictionary.update({filepath.replace(directory,""):set(functionName)}) fileFxnCount[filepath.replace(directory,"")]=len(set(functionName)) if (len(className) != 0): fileClassDictionary.update({filepath.replace(directory,""): set(className)}) fileClassCount[filepath.replace(directory, "")]=len(set(className)) if (len(importModules) != 0): fileImportDictionary.update({filepath.replace(directory,""): set(importModules)}) fileImportCount[filepath.replace(directory, "")]= len(set(importModules)) uniqueImports.append(importModules) #print(len(fileFxnDictionary.values())) workbook = xlsxwriter.Workbook(dirname+"data"+".xlsx") workbook1 = xlsxwriter.Workbook(dirname+"function"+".xlsx") workbook2=xlsxwriter.Workbook(dirname+"count"+".xlsx") ExcelUtility.writeToExcel(callerCalleeFxn, "CallerCalleFxn", workbook1) ExcelUtility.writeToExcel(anu.shortNames, "FxnAbbre.", workbook1) ExcelUtility.writeToExcel(fileFxnDictionary,"functionInfo",workbook) ExcelUtility.writeToExcel(fileClassDictionary, "classInfo",workbook) ExcelUtility.writeToExcel(fileImportDictionary, "importInfo",workbook) ExcelUtility.writeToExcelCount(fileFxnCount, "fxncount", workbook2) ExcelUtility.writeToExcelCount(fileImportCount, "importcount", workbook2) ExcelUtility.writeToExcelCount(fileClassCount, "classcount", workbook2) dumpclean(callerCalleeFxn) print("tree:") PrintUtility.printTree(rootNode) print("Unique Imports are:",len(uniqueImports)) gu.getAllPaths(fxnGraph,True,dirname) gu.getAllPathsWithoutAbbreviations(fxnGraphFull,True,dirname) #visualize tha paths and get all the optimized paths of nodes that is this # function is calling this functioon and further on.. path1: f1 f2 f3 f4 f5 f6 f7 # the results will be saved in excel file as filename=dirname+'pathsoptimized.xlsx' all_paths=gu.getAllOptimizedPaths(fxnGraph, True,filename,dirname) gu.getAllOptimizedPathsWithoutAbbreviations(fxnGraphFull,True,dirname+"pathsoptimizedNoAbbre.xlsx",dirname) print("anu.counter:",anu.counter) callerMatrix=ceu.encodeValues(all_paths,anu.counter,dirname) pathMatrix=cs.getResultSimilarityMatrix(callerMatrix,dirname+"similarity.csv") cluster.test(dirname,pathMatrix) TimeUtility.end(start) #caller=callerMatrix() #caller.cleanInput(callerCalleeFxn) #stopWordsRemoval.removeStopwords('dataold.xlsx') #DeterMinePaths.determine(callerCalleeFxn) PrintUtility.printTree(rootNode) return rootNode,pathMatrix
def printMatrixToExcel(matrix, fileName): eu.printNumpyArrayInExcel(matrix, fileName)