def generate_cli(targetPath, isAbstraction): import subprocess directory = targetPath.rstrip('/').rstrip("\\") if isAbstraction.lower() == "on": absLevel = 4 else: absLevel = 0 proj = directory.replace('\\', '/').split('/')[-1] print("PROJ:", proj) timeIn = time.time() numFile = 0 numFunc = 0 numLine = 0 numMethods = 0 numFields = 0 projDic = {} hashFileMap = {} print("[+] Loading source files... This may take a few minutes.") tupleList = pu.loadSource(directory) numFile = len(tupleList) if numFile == 0: print("[-] Error: Failed loading source files.") print( " Check if you selected proper directory, or if your project contains .c, .cpp or java files." ) sys.exit() else: print("[+] Load complete. Generating hashmark...") if absLevel == 0: func = parseFiles_shallow else: func = parseFiles_deep listOfHashJsons = [] parseResult = {} for idx, tup in enumerate(tupleList): parseResult = func(tup) f = parseResult[0] functionInstanceList = parseResult[1] language = parseResult[2] pathOnly = f.split(proj, 1)[1][1:] fullName = proj + f.split(proj, 1)[1] pathOnly = f.split(proj, 1)[1][1:] if osName == "win": columns = 80 else: try: # http://stackoverflow.com/questions/566746/how-to-get-console-window-width-in-python rows, columns = subprocess.check_output(['stty', 'size']).split() except: columns = 80 progress = 100 * float(idx + 1) / numFile buf = "\r%.2f%% %s" % (progress, fullName) buf += " " * (int(columns) - len(buf)) sys.stdout.write(buf) sys.stdout.flush() numFunc += len(functionInstanceList) if len(functionInstanceList) > 0: numLine += functionInstanceList[0].parentNumLoc for f in functionInstanceList: f.removeListDup() path = f.parentFile origBody, absBody = pu.new_abstract(f, absLevel, language) absBody = pu.normalize(absBody) funcLen = len(absBody) Json = {} if funcLen > 50: hashValue = md5(absBody.encode('utf-8')).hexdigest() cutLength = len( str(f.parentFile.split(str(proj) + "/")[0]) + str(proj) + "/") Json["file"] = str(f.parentFile[cutLength:]) Json["function id"] = str(f.funcId) Json["function length"] = str(funcLen) Json["hash value"] = str(hashValue) listOfHashJsons.append(Json) else: numFunc -= 1 # decrement numFunc by 1 if funclen is under threshold print("") print("[+] Hash index successfully generated.") print("[+] Saving hash index to file...", end=' ') try: os.mkdir("hidx") except: pass packageInfo = str(localVersion) + ' ' + str(proj) + ' ' + str( numFile) + ' ' + str(numFunc) + ' ' + str(numLine) + '\n' with open("hidx/hashmark_" + str(absLevel) + "_" + proj + ".hidx", 'w', encoding="utf-8") as fp: fp.write(packageInfo) fp.write(str(listOfHashJsons)) timeOut = time.time() print("(Done)") print("") print("[+] Elapsed time: %.02f sec." % (timeOut - timeIn)) print("Program statistics:") print(" - " + str(numFile) + ' files;') print(" - " + str(numFunc) + ' functions;') print(" - " + str(numLine) + ' lines of code.') print("") print("[+] Hash index saved to: " + os.getcwd().replace("\\", "/") + "/hidx/hashmark_" + str(absLevel) + "_" + proj + ".hidx")
def generate(self): directory = self.directory.get() absLevel = int(self.absLevel.get()) self.progress = 0 proj = directory.replace('\\', '/').split('/')[-1] timeIn = time.time() numFile = 0 numFunc = 0 numMethods = 0 numLine = 0 numFields = 0 projDic = {} hashFileMap = {} self.listProcess.config(state="normal") self.listProcess.insert( Tkinter.END, "Loading source files... This may take a few minutes.") self.listProcess.update() tupleList = pu.loadSource(directory) numFile = len(tupleList) if numFile == 0: self.listProcess.insert(Tkinter.END, "Error: Failed loading source files.") self.listProcess.insert( Tkinter.END, "- Check if you selected proper directory, or if your project contains .c, .cpp, .py, .js, .go or .java files." ) else: self.listProcess.insert(Tkinter.END, "Load complete. Generating hashmark...") if absLevel == 0: func = parseFiles_shallow else: func = parseFiles_deep listOfHashJsons = [] parseResult = {} for idx, tup in enumerate(tupleList): parseResult = func(tup) f = parseResult[0] functionInstanceList = parseResult[1] language = parseResult[2] pathOnly = f.split(proj, 1)[1][1:] progress = float(idx + 1) / numFile self.progressbar["value"] = progress self.progressbar.update() self.listProcess.insert(Tkinter.END, "[+] " + f) self.listProcess.see("end") numFunc += len(functionInstanceList) if len(functionInstanceList) > 0: numLine += functionInstanceList[0].parentNumLoc for f in functionInstanceList: f.removeListDup() path = f.parentFile origBody, absBody = pu.new_abstract(f, absLevel, language) absBody = pu.normalize(absBody) funcLen = len(absBody) Json = {} if funcLen > 50: hashValue = md5(absBody.encode('utf-8')).hexdigest() cutLength = len( str(f.parentFile.split(str(proj) + "/")[0]) + str(proj) + "/") Json["file"] = str(f.parentFile[cutLength:]) Json["function id"] = str(f.funcId) Json["function length"] = str(funcLen) Json["hash value"] = str(hashValue) listOfHashJsons.append(Json) else: numFunc -= 1 # decrement numFunc by 1 if funclen is under threshold self.listProcess.insert(Tkinter.END, "") self.listProcess.insert(Tkinter.END, "Hash index successfully generated.") self.listProcess.see("end") self.listProcess.insert(Tkinter.END, "") self.listProcess.see("end") self.listProcess.insert(Tkinter.END, "Saving hash index to file...") self.listProcess.see("end") try: os.mkdir("hidx") except: pass packageInfo = str(localVersion) + ' ' + str(proj) + ' ' + str( numFile) + ' ' + str(numFunc) + ' ' + str(numLine) + '\n' with open("hidx/hashmark_" + str(absLevel) + "_" + proj + ".hidx", 'w', encoding="utf-8") as fp: fp.write(packageInfo) fp.write(str(listOfHashJsons)) timeOut = time.time() self.listProcess.insert(Tkinter.END, "Done.") self.listProcess.see("end") self.listProcess.insert(Tkinter.END, "") self.listProcess.insert( Tkinter.END, "Elapsed time: %.02f sec." % (timeOut - timeIn)) self.listProcess.see("end") self.listProcess.insert(Tkinter.END, "Program statistics:") self.listProcess.insert(Tkinter.END, " - " + str(numFile) + ' files;') self.listProcess.insert(Tkinter.END, " - " + str(numFunc) + ' functions;') self.listProcess.insert(Tkinter.END, " - " + str(numLine) + ' lines of code.') self.listProcess.see("end") self.listProcess.insert(Tkinter.END, "") self.listProcess.insert( Tkinter.END, "Hash index saved to: " + os.getcwd().replace("\\", "/") + "/hidx/hashmark_" + str(absLevel) + "_" + proj + ".hidx") self.listProcess.see("end") self.btnOpenFolder.config(state="normal") return 0