def produce_funcBody_hash(function): ''' return the hash value of abstracted and normalized function Body. ''' absBody = pu.abstract(function, 4)[1] absBody = pu.normalize(absBody) hash_value = fnv1a_hash(absBody) #print "hash_value:", hash_value #print "absBody", absBody return [hash_value, absBody]
def generate_cli(targetPath, isAbstraction): import subprocess directory = targetPath.rstrip('/').rstrip("\\") if isAbstraction.lower() == "on": absLevel = 4 else: absLevel = 0 proj = directory.replace('\\', '/').split('/')[-1] print "PROJ:", proj timeIn = time.time() numFile = 0 numFunc = 0 numLine = 0 projDic = {} hashFileMap = {} print "[+] Loading source files... This may take a few minutes." fileList = pu.loadSource(directory) numFile = len(fileList) if numFile == 0: print "[-] Error: Failed loading source files." print " Check if you selected proper directory, or if your project contains .c or .cpp files." sys.exit() else: print "[+] Load complete. Generating hashmark..." if absLevel == 0: func = parseFile_shallow_multi else: func = parseFile_deep_multi cpu_count = get_cpu_count.get_cpu_count() if cpu_count != 1: cpu_count -= 1 pool = multiprocessing.Pool(processes=cpu_count) for idx, tup in enumerate(pool.imap_unordered(func, fileList)): f = tup[0] functionInstanceList = tup[1] fullName = proj + f.split(proj, 1)[1] pathOnly = f.split(proj, 1)[1][1:] if osName == "win": columns = 80 else: try: # http://stackoverflow.com/questions/566746/how-to-get-console-window-width-in-python rows, columns = subprocess.check_output(['stty', 'size']).split() except: columns = 80 progress = 100 * float(idx + 1) / numFile buf = "\r%.2f%% %s" % (progress, fullName) buf += " " * (int(columns) - len(buf)) sys.stdout.write(buf) sys.stdout.flush() numFunc += len(functionInstanceList) if len(functionInstanceList) > 0: numLine += functionInstanceList[0].parentNumLoc for f in functionInstanceList: f.removeListDup() path = f.parentFile # print "\nORIGINALLY:", f.funcBody absBody = pu.abstract(f, absLevel)[1] absBody = pu.normalize(absBody) funcLen = len(absBody) # print "\n", funcLen, absBody if funcLen > 50: hashValue = md5(absBody).hexdigest() try: projDic[funcLen].append(hashValue) except KeyError: projDic[funcLen] = [hashValue] try: hashFileMap[hashValue].extend([pathOnly, f.funcId]) except KeyError: hashFileMap[hashValue] = [pathOnly, f.funcId] else: numFunc -= 1 # decrement numFunc by 1 if funclen is under threshold print "" print "[+] Hash index successfully generated." print "[+] Saving hash index to file...", packageInfo = str(localVersion) + ' ' + str(proj) + ' ' + str( numFile) + ' ' + str(numFunc) + ' ' + str(numLine) + '\n' with open("hidx/hashmark_" + str(absLevel) + "_" + proj + ".hidx", 'w') as fp: fp.write(packageInfo) for key in sorted(projDic): fp.write(str(key) + '\t') for h in list(set(projDic[key])): fp.write(h + '\t') fp.write('\n') fp.write('\n=====\n') for key in sorted(hashFileMap): fp.write(str(key) + '\t') for f in hashFileMap[key]: fp.write(str(f) + '\t') fp.write('\n') timeOut = time.time() print "(Done)" print "" print "[+] Elapsed time: %.02f sec." % (timeOut - timeIn) print "Program statistics:" print " - " + str(numFile) + ' files;' print " - " + str(numFunc) + ' functions;' print " - " + str(numLine) + ' lines of code.' print "" print "[+] Hash index saved to: " + os.getcwd().replace( "\\", "/") + "/hidx/hashmark_" + str(absLevel) + "_" + proj + ".hidx"
def generate(self): directory = self.directory.get() absLevel = int(self.absLevel.get()) self.progress = 0 proj = directory.replace('\\', '/').split('/')[-1] timeIn = time.time() numFile = 0 numFunc = 0 numLine = 0 projDic = {} hashFileMap = {} self.listProcess.config(state="normal") self.listProcess.insert( Tkinter.END, "Loading source files... This may take a few minutes.") self.listProcess.update() fileList = pu.loadSource(directory) numFile = len(fileList) if numFile == 0: self.listProcess.insert(Tkinter.END, "Error: Failed loading source files.") self.listProcess.insert( Tkinter.END, "- Check if you selected proper directory, or if your project contains .c or .cpp files." ) else: # self.listProcess.insert(END, "") self.listProcess.insert(Tkinter.END, "Load complete. Generating hashmark...") # self.listProcess.insert(END, "") # self.listProcess.insert(END, "") if absLevel == 0: func = parseFile_shallow_multi else: func = parseFile_deep_multi cpu_count = get_cpu_count.get_cpu_count() if cpu_count != 1: cpu_count -= 1 pool = multiprocessing.Pool(processes=cpu_count) for idx, tup in enumerate(pool.imap_unordered(func, fileList)): f = tup[0] functionInstanceList = tup[1] pathOnly = f.split(proj, 1)[1][1:] progress = float(idx + 1) / numFile self.progressbar["value"] = progress self.progressbar.update() self.listProcess.insert(Tkinter.END, "[+] " + f) self.listProcess.see("end") numFunc += len(functionInstanceList) if len(functionInstanceList) > 0: numLine += functionInstanceList[0].parentNumLoc for f in functionInstanceList: f.removeListDup() path = f.parentFile absBody = pu.abstract(f, absLevel)[1] # self.listProcess.insert(Tkinter.END, absBody) absBody = pu.normalize(absBody) funcLen = len(absBody) if funcLen > 50: hashValue = md5(absBody).hexdigest() try: projDic[funcLen].append(hashValue) except KeyError: projDic[funcLen] = [hashValue] try: hashFileMap[hashValue].extend([pathOnly, f.funcId]) except KeyError: hashFileMap[hashValue] = [pathOnly, f.funcId] else: numFunc -= 1 # decrement numFunc by 1 if funclen is under threshold self.listProcess.insert(Tkinter.END, "") self.listProcess.insert(Tkinter.END, "Hash index successfully generated.") self.listProcess.see("end") self.listProcess.insert(Tkinter.END, "") self.listProcess.see("end") self.listProcess.insert(Tkinter.END, "Saving hash index to file...") self.listProcess.see("end") try: os.mkdir("hidx") except: pass packageInfo = str(localVersion) + ' ' + str(proj) + ' ' + str( numFile) + ' ' + str(numFunc) + ' ' + str(numLine) + '\n' with open("hidx/hashmark_" + str(absLevel) + "_" + proj + ".hidx", 'w') as fp: fp.write(packageInfo) for key in sorted(projDic): fp.write(str(key) + '\t') for h in list(set(projDic[key])): fp.write(h + '\t') fp.write('\n') fp.write('\n=====\n') for key in sorted(hashFileMap): fp.write(str(key) + '\t') for f in hashFileMap[key]: fp.write(str(f) + '\t') fp.write('\n') timeOut = time.time() self.listProcess.insert(Tkinter.END, "Done.") self.listProcess.see("end") self.listProcess.insert(Tkinter.END, "") self.listProcess.insert( Tkinter.END, "Elapsed time: %.02f sec." % (timeOut - timeIn)) self.listProcess.see("end") self.listProcess.insert(Tkinter.END, "Program statistics:") self.listProcess.insert(Tkinter.END, " - " + str(numFile) + ' files;') self.listProcess.insert(Tkinter.END, " - " + str(numFunc) + ' functions;') self.listProcess.insert(Tkinter.END, " - " + str(numLine) + ' lines of code.') self.listProcess.see("end") self.listProcess.insert(Tkinter.END, "") self.listProcess.insert( Tkinter.END, "Hash index saved to: " + os.getcwd().replace("\\", "/") + "/hidx/hashmark_" + str(absLevel) + "_" + proj + ".hidx") self.listProcess.see("end") self.btnOpenFolder.config(state="normal") return 0