def analyze(dirname,directory): start = TimeUtility.start() anu=an.Abbreviations() rootNode = Node(dirname) file_paths = [] # List which will store all of the full filepaths. fileFxnDictionary = {} fileImportDictionary = {} fileClassDictionary = {} callerFxnArgumentsDictionary={} callerCalleeFxn={} calleFxnArguments={} fileFxnCount={} fileImportCount={} fileClassCount={} uniqueImports=[] callerCalleePath=[] fxnGraph = nx.DiGraph() fxnGraphFull=nx.DiGraph() fxnList=[] classList=[] importList=[] for root, directories, files in os.walk(directory): # Walk the tree. for filename in files: str=filename.__str__() #print("root",root) filepath=root.replace("\\","/") # Join the two strings in order to form the full filepath. filepath=filepath+"/"+filename #filepath = os.path.join(root, filename) file_paths.append(filepath) # Add it to the list. #module = importlib.import_module(filepath) #my_class = getattr(module, 'MyClass') #my_instance = my_class() #dir() # #print("filepath : ",filepath," members are :",dir(module(filepath))) #print("filepath : ", filepath) if(not filepath.endswith(".py")): continue localFileNode = Node(filepath.replace(directory,""), parent=rootNode) file = open(filepath, "r+",encoding="utf8") variable=[] functionName=[] className=[] importModules=[] isNextWordfxn=False isNextWordclass=False isNextWordImport=False isNewWord=False classChildren=False fxnChildren=False mainFxnNode=None clsNode=None for word in file.read().split(): if(stopWordsRemoval.isStopWord(word) or word.__len__()<=2): continue if(isNextWordclass): if(word.__contains__("(")) : className.append(word) classChildren=True clsNode = Node("class:" + word, parent=localFileNode) isNextWordclass=False #print(word) elif(isNextWordfxn): print("fxnname",word) if("(" in word): arg=word.split("(")[1] word=word.split("(")[0] functionName.append(word.lower()) isNextWordfxn=False if(classChildren and (not fxnChildren)): mainFxnNode = Node("Fxn:" + word, parent=clsNode) elif(fxnChildren): fxnNode = Node("Fxn:" + word, parent=mainFxnNode) callerCalleeFxn.update({mainFxnNode.name,fxnChildren}) else: fxnNode = Node("Fxn:" + word, parent=localFileNode) fxnChildren = True #print(word) elif(isNextWordImport): importModules.append(word) isNextWordImport=False importNode = Node("Import:" + word, parent=localFileNode) #print(word) if (word == "def"): isNextWordfxn = True isNewWord=True fxnChildren = False #print("true") elif (word == "class"): isNextWordclass = True isNewWord=True fxnChildren=False elif (word == "import"): isNextWordImport = True isNewWord=True elif (checkWordForValidFunction(word) and fxnChildren): print("got new function lets see::::",word) if(fxnChildren and mainFxnNode is not None): fxnNode = Node("Fxn:" + word, parent=mainFxnNode) callerCalleeFxn[mainFxnNode.name]=[word] fxnGraph.add_edge(anu.get(su.sanitize(mainFxnNode.name)),anu.get(su.sanitize(word))) fxnGraphFull.add_edge(su.sanitize(mainFxnNode.name),su.sanitize(word)) # print("File:",filepath,"Functions:",functionName) #print("File:",filepath,"Classes:", className) #print("File:",filepath,"Import:", importModules) if(len(functionName) != 0): fileFxnDictionary.update({filepath.replace(directory,""):set(functionName)}) fileFxnCount[filepath.replace(directory,"")]=len(set(functionName)) if (len(className) != 0): fileClassDictionary.update({filepath.replace(directory,""): set(className)}) fileClassCount[filepath.replace(directory, "")]=len(set(className)) if (len(importModules) != 0): fileImportDictionary.update({filepath.replace(directory,""): set(importModules)}) fileImportCount[filepath.replace(directory, "")]= len(set(importModules)) uniqueImports.append(importModules) #print(len(fileFxnDictionary.values())) workbook = xlsxwriter.Workbook(dirname+"data"+".xlsx") workbook1 = xlsxwriter.Workbook(dirname+"function"+".xlsx") workbook2=xlsxwriter.Workbook(dirname+"count"+".xlsx") ExcelUtility.writeToExcel(callerCalleeFxn, "CallerCalleFxn", workbook1) ExcelUtility.writeToExcel(anu.shortNames, "FxnAbbre.", workbook1) ExcelUtility.writeToExcel(fileFxnDictionary,"functionInfo",workbook) ExcelUtility.writeToExcel(fileClassDictionary, "classInfo",workbook) ExcelUtility.writeToExcel(fileImportDictionary, "importInfo",workbook) ExcelUtility.writeToExcelCount(fileFxnCount, "fxncount", workbook2) ExcelUtility.writeToExcelCount(fileImportCount, "importcount", workbook2) ExcelUtility.writeToExcelCount(fileClassCount, "classcount", workbook2) dumpclean(callerCalleeFxn) print("tree:") PrintUtility.printTree(rootNode) print("Unique Imports are:",len(uniqueImports)) gu.getAllPaths(fxnGraph,True,dirname) gu.getAllPathsWithoutAbbreviations(fxnGraphFull,True,dirname) #visualize tha paths and get all the optimized paths of nodes that is this # function is calling this functioon and further on.. path1: f1 f2 f3 f4 f5 f6 f7 # the results will be saved in excel file as filename=dirname+'pathsoptimized.xlsx' all_paths=gu.getAllOptimizedPaths(fxnGraph, True,filename,dirname) gu.getAllOptimizedPathsWithoutAbbreviations(fxnGraphFull,True,dirname+"pathsoptimizedNoAbbre.xlsx",dirname) print("anu.counter:",anu.counter) callerMatrix=ceu.encodeValues(all_paths,anu.counter,dirname) pathMatrix=cs.getResultSimilarityMatrix(callerMatrix,dirname+"similarity.csv") cluster.test(dirname,pathMatrix) TimeUtility.end(start) #caller=callerMatrix() #caller.cleanInput(callerCalleeFxn) #stopWordsRemoval.removeStopwords('dataold.xlsx') #DeterMinePaths.determine(callerCalleeFxn) PrintUtility.printTree(rootNode) return rootNode,pathMatrix
import networkx as nx import StringUtility as su import AbbreviatedNamesUtility as AN import GraphUtility as gu import OneHotEncodingUtility as heu fxnGraph = nx.DiGraph() anu = AN.Abbreviations() fxnGraph.add_edge(anu.get(su.sanitize("abc")), anu.get(su.sanitize("def"))) fxnGraph.add_edge(anu.get(su.sanitize("def")), anu.get(su.sanitize("ghj"))) fxnGraph.add_edge(anu.get(su.sanitize("asd")), anu.get(su.sanitize("dds"))) fxnGraph.add_edge(anu.get(su.sanitize("sas")), anu.get(su.sanitize("sada"))) fxnGraph.add_edge(anu.get(su.sanitize("sas")), anu.get(su.sanitize("asdx"))) gu.visualize_to_dot(fxnGraph, "test2.dot") gu.visualize_to_png("test2.dot", "test2.png") all_paths = gu.getAllPaths(fxnGraph, True) #all_paths=gu.getAllOptimizedPaths(fxnGraph,True) print("List size:", len(all_paths)) for sublist in all_paths: print(sublist) heu.getLabelEncoder(sublist)