def __init__(self): self.processDirectory = True self.num_operations = 0 self.operations_so_far = IntegerWrapper(0) #self.ccfx = CCFXEntryPoint('/home/tang/research/linux/Repertoire/ccFinderx/ccfx', 40, False, True) #self.ccfx = CCFXEntryPoint('/home/tang/nas11/research/ray/project/Repertoire/ccFinderx/ccfx', 40, False, True) self.ccfx = CCFXEntryPoint( '/if7/ct4ew/research/ray/project/Repertoire_new/ccFinderx/ccfx', 40, False, True) self.flags = {"No": True, "Yes": False} self.path = {} self.isProcessDiff = False self.tmpPath = None self.outPath = None
def __init__(self): self.processDirectory = True self.num_operations = 0 self.operations_so_far = IntegerWrapper(0) self.ccfx = CCFXEntryPoint('/home/bray/myTool/RepertoireTool/ccFinder/ccfx',10,True,True) self.flags = {"No":True, "Yes":False} self.path = {} self.isProcessDiff = False self.tmpPath = None self.outPath = None
def __init__(self): self.processDirectory = True self.num_operations = 0 self.operations_so_far = IntegerWrapper(0) self.ccfx = CCFXEntryPoint('/home/bray/SealLab/baishakhir/RepertoireTool/ccFinder/ccfx',5,False,True) self.flags = {"No":True, "Yes":False} self.path = {} self.isProcessDiff = False self.tmpPath = None self.outPath = None self.got_some = {'java':False, 'cxx':False, 'hxx':False}
def __init__(self): self.processDirectory = True self.num_operations = 0 self.operations_so_far = IntegerWrapper(0) #self.ccfx = CCFXEntryPoint('/home/tang/research/linux/Repertoire/ccFinderx/ccfx', 40, False, True) #self.ccfx = CCFXEntryPoint('/home/tang/nas11/research/ray/project/Repertoire/ccFinderx/ccfx', 40, False, True) self.ccfx = CCFXEntryPoint('/if7/ct4ew/research/ray/project/Repertoire_new/ccFinderx/ccfx', 40, False, True) self.flags = {"No":True, "Yes":False} self.path = {} self.isProcessDiff = False self.tmpPath = None self.outPath = None
def processImpl(self, model): proj0 = model.getProj(PathBuilder.Proj0) proj1 = model.getProj(PathBuilder.Proj1) path_builder = model.getPathBuilder() converter = CCFXInputConverter() ccfx = CCFXEntryPoint(path_builder, model.getCcfxPath(), model.getCcfxTokenSize()) step = 0 total_steps = 20.0 final_status = False while not self.sync.stopRequested(): if step == 0: self.progress("Loading version histories for first project", step / total_steps) step += 1 proj0.load() elif step == 1: self.progress("Loading version histories for second project", step / total_steps) step += 1 proj1.load() elif step == 2: self.progress("Dumping commits for first project", step / total_steps) step += 1 proj0.dumpCommits() elif step == 3: self.progress("Dumping commits for second project", step / total_steps) step += 1 proj1.dumpCommits() elif step == 4: self.progress("Converting diffs to ccfx compatible format for first project", step / total_steps) step += 1 converter.convert(path_builder) elif step == 5: self.progress("Converting diffs to ccfx compatible format for second project", step / total_steps) step += 1 elif step == 6: self.progress("Running ccFinder for old C, this will take quite some time...", step / total_steps) step += 1 have_old_c = ccfx.processPairs(LangDecider.CXX, False) elif step == 7: self.progress("Running ccFinder for new C, this will take quite some time...", step / total_steps) step += 1 have_new_c = ccfx.processPairs(LangDecider.CXX, True) elif step == 8: self.progress("Running ccFinder for old headers, this will take quite some time...", step / total_steps) step += 1 have_old_h = ccfx.processPairs(LangDecider.HXX, False) elif step == 9: self.progress("Running ccFinder for new headers, this will take quite some time...", step / total_steps) step += 1 have_new_h = ccfx.processPairs(LangDecider.HXX, True) elif step == 10: self.progress("Running ccFinder for old Java, this will take quite some time...", step / total_steps) step += 1 have_old_j = ccfx.processPairs(LangDecider.JAVA, False) elif step == 11: self.progress("Running ccFinder for new Java, this will take quite some time...", step / total_steps) step += 1 have_new_j = ccfx.processPairs(LangDecider.JAVA, True) elif step == 12: self.progress("Filtering ccFinder old C output based on operation...", step / total_steps) step += 1 if not have_old_c: continue is_new = False lang = LangDecider.CXX output = convert_ccfx_output(path_builder, lang, is_new) rep_out_path = path_builder.getRepertoireOutputPath(lang, is_new) rep_out_file = path_builder.getRepertoireOutputFileName(lang, is_new) output.writeToFile(rep_out_path + rep_out_file) elif step == 13: self.progress("Filtering ccFinder new C output based on operation...", step / total_steps) step += 1 if not have_new_c: continue is_new = True lang = LangDecider.CXX output = convert_ccfx_output(path_builder, lang, is_new) rep_out_path = path_builder.getRepertoireOutputPath(lang, is_new) rep_out_file = path_builder.getRepertoireOutputFileName(lang, is_new) output.writeToFile(rep_out_path + rep_out_file) elif step == 14: self.progress("Filtering ccFinder old header output based on operation...", step / total_steps) step += 1 if not have_old_h: continue is_new = False lang = LangDecider.HXX output = convert_ccfx_output(path_builder, lang, is_new) rep_out_path = path_builder.getRepertoireOutputPath(lang, is_new) rep_out_file = path_builder.getRepertoireOutputFileName(lang, is_new) output.writeToFile(rep_out_path + rep_out_file) elif step == 15: self.progress("Filtering ccFinder new header output based on operation...", step / total_steps) step += 1 if not have_new_h: continue is_new = True lang = LangDecider.HXX output = convert_ccfx_output(path_builder, lang, is_new) rep_out_path = path_builder.getRepertoireOutputPath(lang, is_new) rep_out_file = path_builder.getRepertoireOutputFileName(lang, is_new) output.writeToFile(rep_out_path + rep_out_file) elif step == 16: self.progress("Filtering ccFinder old java output based on operation...", step / total_steps) step += 1 if not have_old_j: continue is_new = False lang = LangDecider.JAVA output = convert_ccfx_output(path_builder, lang, is_new) rep_out_path = path_builder.getRepertoireOutputPath(lang, is_new) rep_out_file = path_builder.getRepertoireOutputFileName(lang, is_new) output.writeToFile(rep_out_path + rep_out_file) elif step == 17: self.progress("Filtering ccFinder new java output based on operation...", step / total_steps) step += 1 if not have_new_j: continue is_new = True lang = LangDecider.JAVA output = convert_ccfx_output(path_builder, lang, is_new) rep_out_path = path_builder.getRepertoireOutputPath(lang, is_new) rep_out_file = path_builder.getRepertoireOutputFileName(lang, is_new) output.writeToFile(rep_out_path + rep_out_file) elif step == 18: self.progress( "Combining ccFinder output into a unified database...", step / total_steps) step += 1 pickle.dump(model, open(path_builder.getModelPathAndName(), 'w')) rep_populator = RepDBPopulator(path_builder) db = rep_populator.generateDB(proj0, proj1) db_file = open(path_builder.getDBPathAndName(), 'w') pickle.dump(db, db_file) db_file.close() else: final_status = True break if final_status: return 'Success!', final_status return 'Aborting', final_status
class RepertoireModel: def __init__(self): self.processDirectory = True self.num_operations = 0 self.operations_so_far = IntegerWrapper(0) self.ccfx = CCFXEntryPoint('../ccFinder/ccfx',40,True,True) self.flags = {"No":True, "Yes":False} def setDiffPaths(self, path0 = None, path1 = None): path0 = str(path0) path1 = str(path1) if (not os.path.isdir(path0) or not os.path.isdir(path1)): return False self.paths = {'proj0':path0, 'proj1':path1} return True def setDiffPaths(self, path0 = None, path1 = None, isDirectory = True): path0 = str(path0) path1 = str(path1) self.ccfx.isDirectory = self.processDirectory = isDirectory if (isDirectory is True) and (not os.path.isdir(path0) or not os.path.isdir(path1)): return False elif (isDirectory is False) and (not os.path.isfile(path0) or not os.path.isfile(path1)): return False self.paths = {'proj0':path0, 'proj1':path1} return True def setTmpDirectory(self, path): path = str(path) if not os.path.isdir(path): return False # great, we have a scratch space, lets put our own directory there # so we know we probably aren't going to fight someone else for names uniq = 'repertoire_tmp_' + str(int(os.times()[4] * 100)) tmpPath = path + os.sep + uniq os.mkdir(tmpPath) self.tmpPath = tmpPath return True def setSuffixes(self, jSuff = '', cSuff = '', hSuff = ''): jSuff = str(jSuff) cSuff = str(cSuff) hSuff = str(hSuff) if jSuff.startswith('.'): jSuff = jSuff[1:] if cSuff.startswith('.'): cSuff = cSuff[1:] if hSuff.startswith('.'): hSuff = hSuff[1:] self.suffixes = { 'java':jSuff, 'cxx':cSuff, 'hxx':hSuff, } self.filters = { 'java' : DiffFilter(jSuff), 'cxx' : DiffFilter(cSuff), 'hxx' : DiffFilter(hSuff) } def setCcfxDirectory(self, path): path = str(path) if not os.path.isdir(path): return False ccfx_binary = path + "/ccfx" if os.path.exists(ccfx_binary): self.ccfx.ccfxPath = ccfx_binary return True return False def setCcfxToken(self, token_size): self.ccfx.tokenSize = token_size print "setting ccFinder token size = " + token_size return True def setCcfxFileSeparator(self, flag): self.ccfx.fileSep = self.flags[str(flag)] print "setting ccFinder file separator flag to %d" % (self.ccfx.fileSep) return True def setCcfxGroupSeparator(self, flag): self.ccfx.grpSep = self.flags[str(flag)] print "setting ccFinder group separator flag to %d" % (self.ccfx.grpSep) return True def filterDiffProjs(self, interface): # 3 different file formats, 2 operations each (filter/convert) self.num_operations = len(os.listdir(self.paths['proj0'])) * 3 * 2 if self.paths['proj0'] != self.paths['proj1']: self.num_operations += len(os.listdir(self.paths['proj1'])) * 3 * 2 self.num_operations += 2*6 #2 ccFinder call for all 6 output files self.operations_so_far = IntegerWrapper(0) for proj in ['proj0', 'proj1']: for lang in ['java', 'cxx', 'hxx']: the_filter = self.filters[lang] for i, file_name in enumerate(os.listdir(self.paths[proj])): if interface.cancelled(): return ('User cancelled processing', False) interface.progress('Filtering {0} files'.format(lang), self.operations_so_far.value / float(self.num_operations)) input_path = self.paths[proj] + os.sep + file_name # out_path = (self.pb.getFilterOutputPath(proj, lang) + # ('%04d' % i) + '.' + self.suffixes[lang]) out_path = (self.pb.getFilterOutputPath(proj, lang) + file_name + '.' + self.suffixes[lang]) (ok, gotsome) = the_filter.filterDiff(input_path, out_path) # this is actually tricky, if we got some output for java # in one project but not the other, then we know that # there can't be any clones self.got_some[lang] = self.got_some[lang] and gotsome if not ok: return ('Error processing: ' + file_name, False) self.operations_so_far.incr() if self.paths['proj0'] == self.paths['proj1']: print "filterDiffProjs: two paths same, breaking!!" break def filterDiffFiles(self, interface): # 3 different file formats, 2 operations each (filter/convert) self.num_operations = 3 * 2 if self.paths['proj0'] != self.paths['proj1']: self.num_operations += 3 * 2 self.operations_so_far = IntegerWrapper(0) input_file1 = self.paths['proj0'] input_file2 = self.paths['proj1'] lang1 = os.path.splitext(input_file1)[1] #extension lang2 = os.path.splitext(input_file2)[1] #extension if lang1 != lang2 : print "!!the two files have different extension" print "lang1 = " + lang1 print "lang2 = " + lang2 return False for proj in ['proj0', 'proj1']: for lang in ['java', 'cxx', 'hxx']: the_filter = self.filters[lang] if interface.cancelled(): return ('User cancelled processing', False) interface.progress('Filtering {0} files'.format(lang), self.operations_so_far.value / float(self.num_operations)) input_path = self.paths[proj] out_path = (self.pb.getFilterOutputPath(proj, lang) + os.path.basename(input_path) + '.' + self.suffixes[lang]) (ok, gotsome) = the_filter.filterDiff(input_path, out_path) # this is actually tricky, if we got some output for java # in one project but not the other, then we know that # there can't be any clones self.got_some[lang] = self.got_some[lang] and gotsome if not ok: return ('Error processing: ' + file_name, False) self.operations_so_far.incr() if self.paths['proj0'] == self.paths['proj1']: print "filterDiffFiles: two paths same, breaking!!" break def filterDiffs(self, interface): self.got_some = {'java':True, 'cxx':True, 'hxx':True} # self.haveJava = haveC = haveH = False self.pb = PathBuilder(self.tmpPath, force_clean = True) # First, filter the input diffs by file type, so that all c diffs # are in one set of files, and similarly for java/headers if self.processDirectory is True: self.filterDiffProjs(interface) else: self.filterDiffFiles(interface) # Second, change each diff into ccFinder input format converter = CCFXInputConverter() callback = lambda: interface.progress( 'Converting to ccfx input format', self.operations_so_far.incr() / float(self.num_operations)) converter.convert(self.pb, callback) #new and old for 3 langs self.num_operations = 3 * 2 self.operations_so_far = IntegerWrapper(0) clone_path = self.pb.getCCFXOutputPath() # Third, call ccfx for each directory worked = True for lang in ['java', 'cxx', 'hxx']: if not self.got_some[lang]: interface.progress('ccFinderX executing', self.operations_so_far.incr() / float(self.num_operations)) continue old_path0 = self.pb.getCCFXInputPath(PathBuilder.PROJ0, lang, False) old_path1 = self.pb.getCCFXInputPath(PathBuilder.PROJ1, lang, False) new_path0 = self.pb.getCCFXInputPath(PathBuilder.PROJ0, lang, True) new_path1 = self.pb.getCCFXInputPath(PathBuilder.PROJ1, lang, True) tmp_old_out = clone_path + self.pb.getCCFXOutputFileName( lang, is_new = False, is_tmp = True) tmp_new_out = clone_path + self.pb.getCCFXOutputFileName( lang, is_new = True, is_tmp = True) old_out = clone_path + self.pb.getCCFXOutputFileName( lang, is_new = False, is_tmp = False) new_out = clone_path + self.pb.getCCFXOutputFileName( lang, is_new = True, is_tmp = False) if self.paths['proj0'] == self.paths['proj1']: old_path1 = old_path0 new_path1 = new_path0 worked = worked and self.ccfx.processPair( old_path0, old_path1, tmp_old_out, old_out, lang) interface.progress('ccFinderX executing', self.operations_so_far.incr() / float(self.num_operations)) worked = worked and self.ccfx.processPair( new_path0, new_path1, tmp_new_out, new_out, lang) interface.progress('ccFinderX executing', self.operations_so_far.incr() / float(self.num_operations)) if not worked: return ('ccFinderX execution failed', False) # Fourth, build up our database of clones print "Repertoire filtering...." #new and old for 3 langs self.num_operations = 3 * 2 self.operations_so_far = IntegerWrapper(0) for lang in ['java', 'cxx', 'hxx']: if not self.got_some[lang]: interface.progress('Repertoire filtering based on operation', self.operations_so_far.incr() / float(self.num_operations)) continue for is_new in [True, False]: output = convert_ccfx_output(self.pb, lang, is_new) rep_out_path = self.pb.getRepertoireOutputPath(lang, is_new) suffix = '_old.txt' if is_new: suffix = '_new.txt' output.writeToFile(rep_out_path + lang + suffix) interface.progress('Repertoire filtering based on operation', self.operations_so_far.incr() / float(self.num_operations)) print "Processing successful!!" return ('Processing successful', True)
def __init__(self): self.processDirectory = True self.num_operations = 0 self.operations_so_far = IntegerWrapper(0) self.ccfx = CCFXEntryPoint('../ccFinder/ccfx',40,True,True) self.flags = {"No":True, "Yes":False}
class RepertoireModel: def __init__(self): self.processDirectory = True self.num_operations = 0 self.operations_so_far = IntegerWrapper(0) #self.ccfx = CCFXEntryPoint('/home/tang/research/linux/Repertoire/ccFinderx/ccfx', 40, False, True) #self.ccfx = CCFXEntryPoint('/home/tang/nas11/research/ray/project/Repertoire/ccFinderx/ccfx', 40, False, True) self.ccfx = CCFXEntryPoint('/if7/ct4ew/research/ray/project/Repertoire_new/ccFinderx/ccfx', 40, False, True) self.flags = {"No":True, "Yes":False} self.path = {} self.isProcessDiff = False self.tmpPath = None self.outPath = None def setDiffPath(self, path = None): self.isProcessDiff = True path = str(path) if os.path.isdir(path): self.ccfx.isDirectory = True else: self.ccfx.isDirectory = False projNo = len(self.path) proj = 'proj' + str(projNo) self.path[proj] = path # print self.path return True def setOutDirectory(self, path): #Just setting the outer directory path = str(path) if not (path.startswith("/home") or path.startswith("~/")): path = os.getcwd() + os.sep + path if not os.path.isdir(path): os.mkdir(path) self.outPath = path print "output files will be stored at " + self.outPath return True def setTmpDirectory(self, path): path = str(path) if not os.path.isdir(path): os.mkdir(path) self.tmpPath = path print "output files will be stored at " + self.tmpPath return True def setSuffixes(self, jSuff = '', cSuff = '', hSuff = ''): jSuff = str(jSuff) cSuff = str(cSuff) hSuff = str(hSuff) if jSuff.startswith('.'): jSuff = jSuff[1:] if cSuff.startswith('.'): cSuff = cSuff[1:] if hSuff.startswith('.'): hSuff = hSuff[1:] self.suffixes = { 'java':jSuff, 'cxx':cSuff, 'hxx':hSuff, } self.filters = { 'java' : DiffFilter(jSuff), 'cxx' : DiffFilter(cSuff), 'hxx' : DiffFilter(hSuff) } def setCcfxDirectory(self, path): path = str(path) if not os.path.isdir(path): return False ccfx_binary = path + "/ccfx" if os.path.exists(ccfx_binary): self.ccfx.ccfxPath = ccfx_binary return True return False def setCcfxToken(self, token_size): self.ccfx.tokenSize = token_size print "setting ccFinder token size = " + token_size return True def setCcfxFileSeparator(self, flag): self.ccfx.fileSep = self.flags[str(flag)] print "setting ccFinder file separator flag to %d" % (self.ccfx.fileSep) return True def setCcfxGroupSeparator(self, flag): self.ccfx.grpSep = self.flags[str(flag)] print "setting ccFinder group separator flag to %d" % (self.ccfx.grpSep) return True def filterDiffProj(self,proj): path = self.path[proj] self.num_operations = 3 * 2 self.num_operations += len(os.listdir(path)) * 3 self.operations_so_far = IntegerWrapper(0) #for lang in ['java', 'cxx', 'hxx']: for lang in ['cxx']: #for lang in ['c']: the_filter = self.filters[lang] for i, file_name in enumerate(os.listdir(path)): self.progress('Filtering {0} files'.format(lang)) input_path = path + os.sep + file_name print file_name print input_path out_path = (self.pb.getFilterOutputPath(proj, lang) + file_name + '.' + self.suffixes[lang]) print out_path (ok, gotsome) = the_filter.filterDiff(input_path, out_path) self.got_some[lang] = self.got_some[lang] and gotsome if not ok: return ('Error processing: ' + file_name, False) self.operations_so_far.incr() def filterDiffFile(self,diff_file): # 3 different file formats, 2 operations each (filter/convert) self.num_operations = 3 * 2 self.operations_so_far = IntegerWrapper(0) for lang in ['java', 'cxx', 'hxx']: the_filter = self.filters[lang] self.progress('Filtering {0} files'.format(lang)) input_path = diff_file out_path = (self.pb.getFilterOutputPath(proj, lang) + os.path.basename(input_path) + '.' + self.suffixes[lang]) (ok, gotsome) = the_filter.filterDiff(input_path, out_path) self.got_some[lang] = self.got_some[lang] and gotsome if not ok: return ('Error processing: ' + file_name, False) self.operations_so_far.incr() def progress(self,msg): progressSoFar = (self.operations_so_far.value / float(self.num_operations))*100 print "%s..: %f" % (msg,progressSoFar) def processDiffs(self,proj,path): #self.got_some = {'java':True, 'cxx':True, 'hxx':True} self.got_some = {'java':False, 'cxx':True, 'hxx':False} self.pb = PathBuilder(self.tmpPath, force_clean = True) if os.path.isdir(path) is True: self.filterDiffProj(proj) elif os.path.isfile(path) is True: self.filterDiffFile(proj) else: return ('Invalid path : ' + path, False) # Second, change each diff into ccFinder input format converter = CCFXInputConverter() progress = (self.operations_so_far.incr() / float(self.num_operations))*100 callback = lambda: self.progress('Converting to ccfx input format') converter.convert(proj, self.pb, callback) self.num_operations = 3 * 2 self.operations_so_far = IntegerWrapper(0) return ("Converting diffs to ccFinder compatible format is done",True) def processDiff(self): for proj,path in self.path.items(): self.processDiffs(proj, path) def runCCFinderSelf(self,proj,path): print 'Chong Tang: In runCCFinderSelf() function ----' clone_path = self.pb.getCCFXOutputPath() # Third, call ccfx for each directory worked = True for lang in ['java', 'cxx', 'hxx']: #print 'Chong Tang: In runCCFinerSelf for loop...' #print 'Chong: got_some of java: ' + str(self.got_some['java']) if not self.got_some[lang]: self.progress('ccFinderX executing') continue #print 'Chong Tang: In runCCFinerSelf for loop, after if statement' old_path = self.pb.getCCFXInputPath(proj, lang, False) new_path = self.pb.getCCFXInputPath(proj, lang, True) tmp_old_out = clone_path + self.pb.getCCFXOutputFileName( lang, is_new = False, is_tmp = True) tmp_new_out = clone_path + self.pb.getCCFXOutputFileName( lang, is_new = True, is_tmp = True) old_out = clone_path + self.pb.getCCFXOutputFileName( lang, is_new = False, is_tmp = False) new_out = clone_path + self.pb.getCCFXOutputFileName( lang, is_new = True, is_tmp = False) print 'Chong Tang: before first processPairSelf calling' worked = worked and self.ccfx.processPairSelf( old_path, tmp_old_out, old_out, lang) print 'Chong Tang: after first processPairSelf calling' self.progress('ccFinderX executing') print 'Chong Tang: before second processPairSelf calling' worked = worked and self.ccfx.processPairSelf( new_path, tmp_new_out, new_out, lang) print 'Chong Tang: after second processPairSelf calling' self.progress('ccFinderX executing') if not worked: return ('ccFinderX execution failed', False) self.runRep(proj) # #new and old for 3 langs def runRep(self,proj): print "Repertoire filtering...." self.num_operations = 3 * 2 self.operations_so_far = IntegerWrapper(0) for lang in ['java', 'cxx', 'hxx']: if not self.got_some[lang]: self.progress('Repertoire filtering based on operation') continue for is_new in [True, False]: output = convert_ccfx_output(self.pb,proj,lang, is_new) rep_out_path = self.pb.getRepertoireOutputPath(lang, is_new) suffix = '_old.txt' if is_new: suffix = '_new.txt' output.writeToFile(rep_out_path + lang + suffix) self.progress('Repertoire filtering based on operation') print "Processing successful!!" return ('Processing successful', True)
class RepertoireModel: def __init__(self): self.processDirectory = True self.num_operations = 0 self.operations_so_far = IntegerWrapper(0) #self.ccfx = CCFXEntryPoint('/home/tang/research/linux/Repertoire/ccFinderx/ccfx', 40, False, True) #self.ccfx = CCFXEntryPoint('/home/tang/nas11/research/ray/project/Repertoire/ccFinderx/ccfx', 40, False, True) self.ccfx = CCFXEntryPoint( '/if7/ct4ew/research/ray/project/Repertoire_new/ccFinderx/ccfx', 40, False, True) self.flags = {"No": True, "Yes": False} self.path = {} self.isProcessDiff = False self.tmpPath = None self.outPath = None def setDiffPath(self, path=None): self.isProcessDiff = True path = str(path) if os.path.isdir(path): self.ccfx.isDirectory = True else: self.ccfx.isDirectory = False projNo = len(self.path) proj = 'proj' + str(projNo) self.path[proj] = path # print self.path return True def setOutDirectory(self, path): #Just setting the outer directory path = str(path) if not (path.startswith("/home") or path.startswith("~/")): path = os.getcwd() + os.sep + path if not os.path.isdir(path): os.mkdir(path) self.outPath = path print "output files will be stored at " + self.outPath return True def setTmpDirectory(self, path): path = str(path) if not os.path.isdir(path): os.mkdir(path) self.tmpPath = path print "output files will be stored at " + self.tmpPath return True def setSuffixes(self, jSuff='', cSuff='', hSuff=''): jSuff = str(jSuff) cSuff = str(cSuff) hSuff = str(hSuff) if jSuff.startswith('.'): jSuff = jSuff[1:] if cSuff.startswith('.'): cSuff = cSuff[1:] if hSuff.startswith('.'): hSuff = hSuff[1:] self.suffixes = { 'java': jSuff, 'cxx': cSuff, 'hxx': hSuff, } self.filters = { 'java': DiffFilter(jSuff), 'cxx': DiffFilter(cSuff), 'hxx': DiffFilter(hSuff) } def setCcfxDirectory(self, path): path = str(path) if not os.path.isdir(path): return False ccfx_binary = path + "/ccfx" if os.path.exists(ccfx_binary): self.ccfx.ccfxPath = ccfx_binary return True return False def setCcfxToken(self, token_size): self.ccfx.tokenSize = token_size print "setting ccFinder token size = " + token_size return True def setCcfxFileSeparator(self, flag): self.ccfx.fileSep = self.flags[str(flag)] print "setting ccFinder file separator flag to %d" % ( self.ccfx.fileSep) return True def setCcfxGroupSeparator(self, flag): self.ccfx.grpSep = self.flags[str(flag)] print "setting ccFinder group separator flag to %d" % ( self.ccfx.grpSep) return True def filterDiffProj(self, proj): path = self.path[proj] self.num_operations = 3 * 2 self.num_operations += len(os.listdir(path)) * 3 self.operations_so_far = IntegerWrapper(0) #for lang in ['java', 'cxx', 'hxx']: for lang in ['cxx']: #for lang in ['c']: the_filter = self.filters[lang] for i, file_name in enumerate(os.listdir(path)): self.progress('Filtering {0} files'.format(lang)) input_path = path + os.sep + file_name print file_name print input_path out_path = (self.pb.getFilterOutputPath(proj, lang) + file_name + '.' + self.suffixes[lang]) print out_path (ok, gotsome) = the_filter.filterDiff(input_path, out_path) self.got_some[lang] = self.got_some[lang] and gotsome if not ok: return ('Error processing: ' + file_name, False) self.operations_so_far.incr() def filterDiffFile(self, diff_file): # 3 different file formats, 2 operations each (filter/convert) self.num_operations = 3 * 2 self.operations_so_far = IntegerWrapper(0) for lang in ['java', 'cxx', 'hxx']: the_filter = self.filters[lang] self.progress('Filtering {0} files'.format(lang)) input_path = diff_file out_path = (self.pb.getFilterOutputPath(proj, lang) + os.path.basename(input_path) + '.' + self.suffixes[lang]) (ok, gotsome) = the_filter.filterDiff(input_path, out_path) self.got_some[lang] = self.got_some[lang] and gotsome if not ok: return ('Error processing: ' + file_name, False) self.operations_so_far.incr() def progress(self, msg): progressSoFar = (self.operations_so_far.value / float(self.num_operations)) * 100 print "%s..: %f" % (msg, progressSoFar) def processDiffs(self, proj, path): #self.got_some = {'java':True, 'cxx':True, 'hxx':True} self.got_some = {'java': False, 'cxx': True, 'hxx': False} self.pb = PathBuilder(self.tmpPath, force_clean=True) if os.path.isdir(path) is True: self.filterDiffProj(proj) elif os.path.isfile(path) is True: self.filterDiffFile(proj) else: return ('Invalid path : ' + path, False) # Second, change each diff into ccFinder input format converter = CCFXInputConverter() progress = (self.operations_so_far.incr() / float(self.num_operations)) * 100 callback = lambda: self.progress('Converting to ccfx input format') converter.convert(proj, self.pb, callback) self.num_operations = 3 * 2 self.operations_so_far = IntegerWrapper(0) return ("Converting diffs to ccFinder compatible format is done", True) def processDiff(self): for proj, path in self.path.items(): self.processDiffs(proj, path) def runCCFinderSelf(self, proj, path): print 'Chong Tang: In runCCFinderSelf() function ----' clone_path = self.pb.getCCFXOutputPath() # Third, call ccfx for each directory worked = True for lang in ['java', 'cxx', 'hxx']: #print 'Chong Tang: In runCCFinerSelf for loop...' #print 'Chong: got_some of java: ' + str(self.got_some['java']) if not self.got_some[lang]: self.progress('ccFinderX executing') continue #print 'Chong Tang: In runCCFinerSelf for loop, after if statement' old_path = self.pb.getCCFXInputPath(proj, lang, False) new_path = self.pb.getCCFXInputPath(proj, lang, True) tmp_old_out = clone_path + self.pb.getCCFXOutputFileName( lang, is_new=False, is_tmp=True) tmp_new_out = clone_path + self.pb.getCCFXOutputFileName( lang, is_new=True, is_tmp=True) old_out = clone_path + self.pb.getCCFXOutputFileName( lang, is_new=False, is_tmp=False) new_out = clone_path + self.pb.getCCFXOutputFileName( lang, is_new=True, is_tmp=False) print 'Chong Tang: before first processPairSelf calling' worked = worked and self.ccfx.processPairSelf( old_path, tmp_old_out, old_out, lang) print 'Chong Tang: after first processPairSelf calling' self.progress('ccFinderX executing') print 'Chong Tang: before second processPairSelf calling' worked = worked and self.ccfx.processPairSelf( new_path, tmp_new_out, new_out, lang) print 'Chong Tang: after second processPairSelf calling' self.progress('ccFinderX executing') if not worked: return ('ccFinderX execution failed', False) self.runRep(proj) # #new and old for 3 langs def runRep(self, proj): print "Repertoire filtering...." self.num_operations = 3 * 2 self.operations_so_far = IntegerWrapper(0) for lang in ['java', 'cxx', 'hxx']: if not self.got_some[lang]: self.progress('Repertoire filtering based on operation') continue for is_new in [True, False]: output = convert_ccfx_output(self.pb, proj, lang, is_new) rep_out_path = self.pb.getRepertoireOutputPath(lang, is_new) suffix = '_old.txt' if is_new: suffix = '_new.txt' output.writeToFile(rep_out_path + lang + suffix) self.progress('Repertoire filtering based on operation') print "Processing successful!!" return ('Processing successful', True)
def processImpl(self, model): proj0 = model.getProj(PathBuilder.Proj0) proj1 = model.getProj(PathBuilder.Proj1) path_builder = model.getPathBuilder() converter = CCFXInputConverter() ccfx = CCFXEntryPoint(path_builder, model.getCcfxPath(), model.getCcfxTokenSize()) step = 0 total_steps = 20.0 final_status = False # step == 0: print "Loading version histories for first project" + str(step / total_steps) step += 1 proj0.load() print "Loading version histories for second project" + str(step / total_steps) step += 1 proj1.load() print "Dumping commits for first project" + str(step / total_steps) step += 1 proj0.dumpCommits() print "Dumping commits for second project" + str(step / total_steps) step += 1 proj1.dumpCommits() print "Converting diffs to ccfx compatible format" + str(step / total_steps) step += 1 converter.convert(path_builder) print "Running ccFinder for old C, this will take quite some time..." + str(step / total_steps) step += 1 have_old_c = ccfx.processPairs(LangDecider.CXX, False) print "Running ccFinder for new C, this will take quite some time..." + str(step / total_steps) step += 1 have_new_c = ccfx.processPairs(LangDecider.CXX, True) print "Running ccFinder for old headers, this will take quite some time..." + str(step / total_steps) step += 1 have_old_h = ccfx.processPairs(LangDecider.HXX, False) print "Running ccFinder for new headers, this will take quite some time..." + str(step / total_steps) step += 1 have_new_h = ccfx.processPairs(LangDecider.HXX, True) print "Running ccFinder for old Java, this will take quite some time..." + str(step / total_steps) step += 1 have_old_j = ccfx.processPairs(LangDecider.JAVA, False) print "Running ccFinder for new Java, this will take quite some time..." + str(step / total_steps) step += 1 have_new_j = ccfx.processPairs(LangDecider.JAVA, True) print "Filtering ccFinder old C output based on operation..." + str(step / total_steps) step += 1 if have_old_c: is_new = False lang = LangDecider.CXX output = convert_ccfx_output(path_builder, lang, is_new) rep_out_path = path_builder.getRepertoireOutputPath(lang, is_new) rep_out_file = path_builder.getRepertoireOutputFileName(lang, is_new) output.writeToFile(rep_out_path + rep_out_file) print "Filtering ccFinder new C output based on operation..." + str(step / total_steps) step += 1 if have_new_c: is_new = True lang = LangDecider.CXX output = convert_ccfx_output(path_builder, lang, is_new) rep_out_path = path_builder.getRepertoireOutputPath(lang, is_new) rep_out_file = path_builder.getRepertoireOutputFileName(lang, is_new) output.writeToFile(rep_out_path + rep_out_file) print "Filtering ccFinder old header output based on operation..." + str(step / total_steps) step += 1 if have_old_h: is_new = False lang = LangDecider.HXX output = convert_ccfx_output(path_builder, lang, is_new) rep_out_path = path_builder.getRepertoireOutputPath(lang, is_new) rep_out_file = path_builder.getRepertoireOutputFileName(lang, is_new) output.writeToFile(rep_out_path + rep_out_file) print "Filtering ccFinder new header output based on operation..." + str(step / total_steps) step += 1 if have_new_h: is_new = True lang = LangDecider.HXX output = convert_ccfx_output(path_builder, lang, is_new) rep_out_path = path_builder.getRepertoireOutputPath(lang, is_new) rep_out_file = path_builder.getRepertoireOutputFileName(lang, is_new) output.writeToFile(rep_out_path + rep_out_file) print "Filtering ccFinder old java output based on operation..." + str(step / total_steps) step += 1 if not have_old_j: is_new = False lang = LangDecider.JAVA output = convert_ccfx_output(path_builder, lang, is_new) rep_out_path = path_builder.getRepertoireOutputPath(lang, is_new) rep_out_file = path_builder.getRepertoireOutputFileName(lang, is_new) output.writeToFile(rep_out_path + rep_out_file) print "Filtering ccFinder new java output based on operation..." + str(step / total_steps) step += 1 if not have_new_j: is_new = True lang = LangDecider.JAVA output = convert_ccfx_output(path_builder, lang, is_new) rep_out_path = path_builder.getRepertoireOutputPath(lang, is_new) rep_out_file = path_builder.getRepertoireOutputFileName(lang, is_new) output.writeToFile(rep_out_path + rep_out_file) print "Combining ccFinder output into a unified database..." + str(step / total_steps) step += 1 pickle.dump(model, open(path_builder.getModelPathAndName(), "w")) rep_populator = RepDBPopulator(path_builder) db = rep_populator.generateDB(proj0, proj1) db_file = open(path_builder.getDBPathAndName(), "w") pickle.dump(db, db_file) db_file.close() final_status = True if final_status: return "Success!", final_status return "Aborting", final_status
def __init__(self, model): self.proj0 = model.getProj(PathBuilder.Proj0) self.proj1 = model.getProj(PathBuilder.Proj1) self.path_builder = model.getPathBuilder() self.converter = CCFXInputConverter() self.ccfx = CCFXEntryPoint(self.path_builder, model.getCcfxPath(), model.getCcfxTokenSize())
class RepDriver: def __init__(self, model): self.proj0 = model.getProj(PathBuilder.Proj0) self.proj1 = model.getProj(PathBuilder.Proj1) self.path_builder = model.getPathBuilder() self.converter = CCFXInputConverter() self.ccfx = CCFXEntryPoint(self.path_builder, model.getCcfxPath(), model.getCcfxTokenSize()) def ccfxConvert(self): print "Converting diffs to ccfx compatible format" if self.proj0: proj0_repo = self.proj0.getRepoRoot() + os.sep print proj0_repo self.path_builder.setExtDiffPath(0, proj0_repo) if self.proj1: proj1_repo = self.proj1.getRepoRoot() + os.sep print proj1_repo self.path_builder.setExtDiffPath(1, proj1_repo) self.converter.convertExtDiffs(self.path_builder) def runCCFX_old(self, lang): print "Running ccFinder for old files, this will take quite some time..." have_old_lang = self.ccfx.processPairs(lang, False) print "Filtering ccFinder old output based on operation..." if have_old_lang: is_new = False output = convert_ccfx_output(self.path_builder, lang, is_new) rep_out_path = self.path_builder.getRepertoireOutputPath(lang, is_new) rep_out_file = self.path_builder.getRepertoireOutputFileName(lang, is_new) output.writeToFile(rep_out_path + rep_out_file) def runCCFX_new(self, lang): print "Running ccFinder for new files, this will take quite some time..." have_new_lang = self.ccfx.processPairs(lang, True) print "Filtering ccFinder new output based on operation..." if have_new_lang: is_new = True # lang = have_new_lang output = convert_ccfx_output(self.path_builder, lang, is_new) rep_out_path = self.path_builder.getRepertoireOutputPath(lang, is_new) rep_out_file = self.path_builder.getRepertoireOutputFileName(lang, is_new) output.writeToFile(rep_out_path + rep_out_file) def runCCFX(self): # self.runCCFX_old() for lang in [LangDecider.CXX, LangDecider.HXX, LangDecider.JAVA]: self.runCCFX_new(lang) self.runCCFX_old(lang) def process(self, rep_model): msg, success = self.processImpl(rep_model) def processImpl(self, model): proj0 = model.getProj(PathBuilder.Proj0) proj1 = model.getProj(PathBuilder.Proj1) path_builder = model.getPathBuilder() converter = CCFXInputConverter() ccfx = CCFXEntryPoint(path_builder, model.getCcfxPath(), model.getCcfxTokenSize()) step = 0 total_steps = 20.0 final_status = False # step == 0: print "Loading version histories for first project" + str(step / total_steps) step += 1 proj0.load() print "Loading version histories for second project" + str(step / total_steps) step += 1 proj1.load() print "Dumping commits for first project" + str(step / total_steps) step += 1 proj0.dumpCommits() print "Dumping commits for second project" + str(step / total_steps) step += 1 proj1.dumpCommits() print "Converting diffs to ccfx compatible format" + str(step / total_steps) step += 1 converter.convert(path_builder) print "Running ccFinder for old C, this will take quite some time..." + str(step / total_steps) step += 1 have_old_c = ccfx.processPairs(LangDecider.CXX, False) print "Running ccFinder for new C, this will take quite some time..." + str(step / total_steps) step += 1 have_new_c = ccfx.processPairs(LangDecider.CXX, True) print "Running ccFinder for old headers, this will take quite some time..." + str(step / total_steps) step += 1 have_old_h = ccfx.processPairs(LangDecider.HXX, False) print "Running ccFinder for new headers, this will take quite some time..." + str(step / total_steps) step += 1 have_new_h = ccfx.processPairs(LangDecider.HXX, True) print "Running ccFinder for old Java, this will take quite some time..." + str(step / total_steps) step += 1 have_old_j = ccfx.processPairs(LangDecider.JAVA, False) print "Running ccFinder for new Java, this will take quite some time..." + str(step / total_steps) step += 1 have_new_j = ccfx.processPairs(LangDecider.JAVA, True) print "Filtering ccFinder old C output based on operation..." + str(step / total_steps) step += 1 if have_old_c: is_new = False lang = LangDecider.CXX output = convert_ccfx_output(path_builder, lang, is_new) rep_out_path = path_builder.getRepertoireOutputPath(lang, is_new) rep_out_file = path_builder.getRepertoireOutputFileName(lang, is_new) output.writeToFile(rep_out_path + rep_out_file) print "Filtering ccFinder new C output based on operation..." + str(step / total_steps) step += 1 if have_new_c: is_new = True lang = LangDecider.CXX output = convert_ccfx_output(path_builder, lang, is_new) rep_out_path = path_builder.getRepertoireOutputPath(lang, is_new) rep_out_file = path_builder.getRepertoireOutputFileName(lang, is_new) output.writeToFile(rep_out_path + rep_out_file) print "Filtering ccFinder old header output based on operation..." + str(step / total_steps) step += 1 if have_old_h: is_new = False lang = LangDecider.HXX output = convert_ccfx_output(path_builder, lang, is_new) rep_out_path = path_builder.getRepertoireOutputPath(lang, is_new) rep_out_file = path_builder.getRepertoireOutputFileName(lang, is_new) output.writeToFile(rep_out_path + rep_out_file) print "Filtering ccFinder new header output based on operation..." + str(step / total_steps) step += 1 if have_new_h: is_new = True lang = LangDecider.HXX output = convert_ccfx_output(path_builder, lang, is_new) rep_out_path = path_builder.getRepertoireOutputPath(lang, is_new) rep_out_file = path_builder.getRepertoireOutputFileName(lang, is_new) output.writeToFile(rep_out_path + rep_out_file) print "Filtering ccFinder old java output based on operation..." + str(step / total_steps) step += 1 if not have_old_j: is_new = False lang = LangDecider.JAVA output = convert_ccfx_output(path_builder, lang, is_new) rep_out_path = path_builder.getRepertoireOutputPath(lang, is_new) rep_out_file = path_builder.getRepertoireOutputFileName(lang, is_new) output.writeToFile(rep_out_path + rep_out_file) print "Filtering ccFinder new java output based on operation..." + str(step / total_steps) step += 1 if not have_new_j: is_new = True lang = LangDecider.JAVA output = convert_ccfx_output(path_builder, lang, is_new) rep_out_path = path_builder.getRepertoireOutputPath(lang, is_new) rep_out_file = path_builder.getRepertoireOutputFileName(lang, is_new) output.writeToFile(rep_out_path + rep_out_file) print "Combining ccFinder output into a unified database..." + str(step / total_steps) step += 1 pickle.dump(model, open(path_builder.getModelPathAndName(), "w")) rep_populator = RepDBPopulator(path_builder) db = rep_populator.generateDB(proj0, proj1) db_file = open(path_builder.getDBPathAndName(), "w") pickle.dump(db, db_file) db_file.close() final_status = True if final_status: return "Success!", final_status return "Aborting", final_status