Exemplo n.º 1
0
 def __init__(self):
     self.processDirectory = True
     self.num_operations = 0
     self.operations_so_far = IntegerWrapper(0)
     #self.ccfx = CCFXEntryPoint('/home/tang/research/linux/Repertoire/ccFinderx/ccfx', 40, False, True)
     #self.ccfx = CCFXEntryPoint('/home/tang/nas11/research/ray/project/Repertoire/ccFinderx/ccfx', 40, False, True)
     self.ccfx = CCFXEntryPoint(
         '/if7/ct4ew/research/ray/project/Repertoire_new/ccFinderx/ccfx',
         40, False, True)
     self.flags = {"No": True, "Yes": False}
     self.path = {}
     self.isProcessDiff = False
     self.tmpPath = None
     self.outPath = None
Exemplo n.º 2
0
 def __init__(self):
     self.processDirectory = True
     self.num_operations = 0
     self.operations_so_far = IntegerWrapper(0)
     self.ccfx = CCFXEntryPoint('/home/bray/myTool/RepertoireTool/ccFinder/ccfx',10,True,True)
     self.flags = {"No":True, "Yes":False}
     self.path = {}
     self.isProcessDiff = False
     self.tmpPath = None
     self.outPath = None
Exemplo n.º 3
0
 def __init__(self):
     self.processDirectory = True
     self.num_operations = 0
     self.operations_so_far = IntegerWrapper(0)
     self.ccfx = CCFXEntryPoint('/home/bray/SealLab/baishakhir/RepertoireTool/ccFinder/ccfx',5,False,True)
     self.flags = {"No":True, "Yes":False}
     self.path = {}
     self.isProcessDiff = False
     self.tmpPath = None
     self.outPath = None
     self.got_some = {'java':False, 'cxx':False, 'hxx':False}
Exemplo n.º 4
0
    def __init__(self):
        self.processDirectory = True
        self.num_operations = 0
        self.operations_so_far = IntegerWrapper(0)
#self.ccfx = CCFXEntryPoint('/home/tang/research/linux/Repertoire/ccFinderx/ccfx', 40, False, True)
#self.ccfx = CCFXEntryPoint('/home/tang/nas11/research/ray/project/Repertoire/ccFinderx/ccfx', 40, False, True)
        self.ccfx = CCFXEntryPoint('/if7/ct4ew/research/ray/project/Repertoire_new/ccFinderx/ccfx', 40, False, True)
        self.flags = {"No":True, "Yes":False}
        self.path = {}
        self.isProcessDiff = False
        self.tmpPath = None
        self.outPath = None
Exemplo n.º 5
0
    def processImpl(self, model):
        proj0 = model.getProj(PathBuilder.Proj0)
        proj1 = model.getProj(PathBuilder.Proj1)
        path_builder = model.getPathBuilder()
        converter = CCFXInputConverter()
        ccfx = CCFXEntryPoint(path_builder, model.getCcfxPath(), model.getCcfxTokenSize())

        step = 0
        total_steps = 20.0
        final_status = False
        while not self.sync.stopRequested():
            if step == 0:
                self.progress("Loading version histories for first project",
                        step / total_steps)
                step += 1
                proj0.load()
            elif step == 1:
                self.progress("Loading version histories for second project",
                        step / total_steps)
                step += 1
                proj1.load()
            elif step == 2:
                self.progress("Dumping commits for first project",
                        step / total_steps)
                step += 1
                proj0.dumpCommits()
            elif step == 3:
                self.progress("Dumping commits for second project",
                        step / total_steps)
                step += 1
                proj1.dumpCommits()
            elif step == 4:
                self.progress("Converting diffs to ccfx compatible format for first project",
                        step / total_steps)
                step += 1
                converter.convert(path_builder)
            elif step == 5:
                self.progress("Converting diffs to ccfx compatible format for second project",
                        step / total_steps)
                step += 1
            elif step == 6:
                self.progress("Running ccFinder for old C, this will take quite some time...",
                        step / total_steps)
                step += 1
                have_old_c = ccfx.processPairs(LangDecider.CXX, False)
            elif step == 7:
                self.progress("Running ccFinder for new C, this will take quite some time...",
                        step / total_steps)
                step += 1
                have_new_c = ccfx.processPairs(LangDecider.CXX, True)
            elif step == 8:
                self.progress("Running ccFinder for old headers, this will take quite some time...",
                        step / total_steps)
                step += 1
                have_old_h = ccfx.processPairs(LangDecider.HXX, False)
            elif step == 9:
                self.progress("Running ccFinder for new headers, this will take quite some time...",
                        step / total_steps)
                step += 1
                have_new_h = ccfx.processPairs(LangDecider.HXX, True)
            elif step == 10:
                self.progress("Running ccFinder for old Java, this will take quite some time...",
                        step / total_steps)
                step += 1
                have_old_j = ccfx.processPairs(LangDecider.JAVA, False)
            elif step == 11:
                self.progress("Running ccFinder for new Java, this will take quite some time...",
                        step / total_steps)
                step += 1
                have_new_j = ccfx.processPairs(LangDecider.JAVA, True)
            elif step == 12:
                self.progress("Filtering ccFinder old C output based on operation...",
                        step / total_steps)
                step += 1
                if not have_old_c:
                    continue
                is_new = False
                lang = LangDecider.CXX
                output = convert_ccfx_output(path_builder, lang, is_new)
                rep_out_path = path_builder.getRepertoireOutputPath(lang, is_new)
                rep_out_file = path_builder.getRepertoireOutputFileName(lang, is_new)
                output.writeToFile(rep_out_path + rep_out_file)
            elif step == 13:
                self.progress("Filtering ccFinder new C output based on operation...",
                        step / total_steps)
                step += 1
                if not have_new_c:
                    continue
                is_new = True
                lang = LangDecider.CXX
                output = convert_ccfx_output(path_builder, lang, is_new)
                rep_out_path = path_builder.getRepertoireOutputPath(lang, is_new)
                rep_out_file = path_builder.getRepertoireOutputFileName(lang, is_new)
                output.writeToFile(rep_out_path + rep_out_file)
            elif step == 14:
                self.progress("Filtering ccFinder old header output based on operation...",
                        step / total_steps)
                step += 1
                if not have_old_h:
                    continue
                is_new = False
                lang = LangDecider.HXX
                output = convert_ccfx_output(path_builder, lang, is_new)
                rep_out_path = path_builder.getRepertoireOutputPath(lang, is_new)
                rep_out_file = path_builder.getRepertoireOutputFileName(lang, is_new)
                output.writeToFile(rep_out_path + rep_out_file)
            elif step == 15:
                self.progress("Filtering ccFinder new header output based on operation...",
                        step / total_steps)
                step += 1
                if not have_new_h:
                    continue
                is_new = True
                lang = LangDecider.HXX
                output = convert_ccfx_output(path_builder, lang, is_new)
                rep_out_path = path_builder.getRepertoireOutputPath(lang, is_new)
                rep_out_file = path_builder.getRepertoireOutputFileName(lang, is_new)
                output.writeToFile(rep_out_path + rep_out_file)
            elif step == 16:
                self.progress("Filtering ccFinder old java output based on operation...",
                        step / total_steps)
                step += 1
                if not have_old_j:
                    continue
                is_new = False
                lang = LangDecider.JAVA
                output = convert_ccfx_output(path_builder, lang, is_new)
                rep_out_path = path_builder.getRepertoireOutputPath(lang, is_new)
                rep_out_file = path_builder.getRepertoireOutputFileName(lang, is_new)
                output.writeToFile(rep_out_path + rep_out_file)
            elif step == 17:
                self.progress("Filtering ccFinder new java output based on operation...",
                        step / total_steps)
                step += 1
                if not have_new_j:
                    continue
                is_new = True
                lang = LangDecider.JAVA
                output = convert_ccfx_output(path_builder, lang, is_new)
                rep_out_path = path_builder.getRepertoireOutputPath(lang, is_new)
                rep_out_file = path_builder.getRepertoireOutputFileName(lang, is_new)
                output.writeToFile(rep_out_path + rep_out_file)
            elif step == 18:
                self.progress(
                        "Combining ccFinder output into a unified database...",
                        step / total_steps)
                step += 1
                pickle.dump(model, open(path_builder.getModelPathAndName(), 'w'))
                rep_populator = RepDBPopulator(path_builder)
                db = rep_populator.generateDB(proj0, proj1)
                db_file = open(path_builder.getDBPathAndName(), 'w')
                pickle.dump(db, db_file)
                db_file.close()
            else:
                final_status = True
                break

        if final_status:
            return 'Success!', final_status
        return 'Aborting', final_status
Exemplo n.º 6
0
class RepertoireModel:
    def __init__(self):
        self.processDirectory = True
        self.num_operations = 0
        self.operations_so_far = IntegerWrapper(0)
        self.ccfx = CCFXEntryPoint('../ccFinder/ccfx',40,True,True)
        self.flags = {"No":True, "Yes":False}

    def setDiffPaths(self, path0 = None, path1 = None):
        path0 = str(path0)
        path1 = str(path1)
        if (not os.path.isdir(path0) or
            not os.path.isdir(path1)):
            return False
        self.paths = {'proj0':path0, 'proj1':path1}
        return True

    def setDiffPaths(self, path0 = None, path1 = None, isDirectory = True):
        path0 = str(path0)
        path1 = str(path1)
        
        self.ccfx.isDirectory = self.processDirectory = isDirectory
        if (isDirectory is True) and (not os.path.isdir(path0) or
            not os.path.isdir(path1)):
            return False
        elif (isDirectory is False) and (not os.path.isfile(path0) or
            not os.path.isfile(path1)):
            return False
        self.paths = {'proj0':path0, 'proj1':path1}
        return True

    def setTmpDirectory(self, path):
        path = str(path)
        if not os.path.isdir(path):
            return False
        # great, we have a scratch space, lets put our own directory there
        # so we know we probably aren't going to fight someone else for names
        uniq = 'repertoire_tmp_' + str(int(os.times()[4] * 100))
        tmpPath = path + os.sep + uniq
        os.mkdir(tmpPath)
        self.tmpPath = tmpPath
        return True

    def setSuffixes(self, jSuff = '', cSuff = '', hSuff = ''):
        jSuff = str(jSuff)
        cSuff = str(cSuff)
        hSuff = str(hSuff)
        if jSuff.startswith('.'):
            jSuff = jSuff[1:]
        if cSuff.startswith('.'):
            cSuff = cSuff[1:]
        if hSuff.startswith('.'):
            hSuff = hSuff[1:]
        self.suffixes = {
                'java':jSuff,
                'cxx':cSuff,
                'hxx':hSuff,
                }
        self.filters = {
                'java' : DiffFilter(jSuff),
                'cxx'  : DiffFilter(cSuff),
                'hxx'  : DiffFilter(hSuff)
                }

    def setCcfxDirectory(self, path):
        path = str(path)
        if not os.path.isdir(path):
            return False
        ccfx_binary = path + "/ccfx"
        if os.path.exists(ccfx_binary):
            self.ccfx.ccfxPath = ccfx_binary
            return True
        return False

    def setCcfxToken(self, token_size):
        self.ccfx.tokenSize = token_size
        print "setting ccFinder token size = " + token_size
        return True

    def setCcfxFileSeparator(self, flag):
        self.ccfx.fileSep = self.flags[str(flag)]
        print "setting ccFinder file separator flag to %d" % (self.ccfx.fileSep)
        return True

    def setCcfxGroupSeparator(self, flag):
        self.ccfx.grpSep = self.flags[str(flag)]
        print "setting ccFinder group separator flag to %d" % (self.ccfx.grpSep)
        return True


    def filterDiffProjs(self, interface):
      # 3 different file formats, 2 operations each (filter/convert)     
        self.num_operations = len(os.listdir(self.paths['proj0'])) * 3 * 2
        if self.paths['proj0'] != self.paths['proj1']:
            self.num_operations += len(os.listdir(self.paths['proj1'])) * 3 * 2
        self.num_operations += 2*6 #2 ccFinder call for all 6 output files
        self.operations_so_far = IntegerWrapper(0)

        for proj in ['proj0', 'proj1']:
            for lang in ['java', 'cxx', 'hxx']:
                the_filter = self.filters[lang]
                for i, file_name in enumerate(os.listdir(self.paths[proj])):
                    if interface.cancelled():
                        return ('User cancelled processing', False)
                    interface.progress('Filtering {0} files'.format(lang),
                            self.operations_so_far.value / float(self.num_operations))
                    input_path = self.paths[proj] + os.sep + file_name
#                    out_path = (self.pb.getFilterOutputPath(proj, lang) +
#                            ('%04d' % i) + '.' + self.suffixes[lang])
                    out_path = (self.pb.getFilterOutputPath(proj, lang) +
                            file_name + '.' + self.suffixes[lang])
                    (ok, gotsome) = the_filter.filterDiff(input_path, out_path)
                    # this is actually tricky, if we got some output for java
                    # in one project but not the other, then we know that
                    # there can't be any clones
                    self.got_some[lang] = self.got_some[lang] and gotsome
                    if not ok:
                        return ('Error processing: ' + file_name, False)
                    self.operations_so_far.incr()
            if self.paths['proj0'] == self.paths['proj1']:
                print "filterDiffProjs: two paths same, breaking!!"
                break

    
    def filterDiffFiles(self, interface):
        # 3 different file formats, 2 operations each (filter/convert)
        self.num_operations =  3 * 2
        if self.paths['proj0'] != self.paths['proj1']:
            self.num_operations += 3 * 2
        self.operations_so_far = IntegerWrapper(0)

        input_file1 = self.paths['proj0']
        input_file2 = self.paths['proj1']
        lang1 = os.path.splitext(input_file1)[1] #extension
        lang2 = os.path.splitext(input_file2)[1] #extension

        if lang1 != lang2 :
            print "!!the two files have different extension"
            print "lang1 = " + lang1
            print "lang2 = " + lang2
            return False

        for proj in ['proj0', 'proj1']:
            for lang in ['java', 'cxx', 'hxx']:
                the_filter = self.filters[lang]
                if interface.cancelled():
                    return ('User cancelled processing', False)
                interface.progress('Filtering {0} files'.format(lang),
                        self.operations_so_far.value / float(self.num_operations))
                input_path = self.paths[proj]
                out_path = (self.pb.getFilterOutputPath(proj, lang) +
                        os.path.basename(input_path) + '.' + self.suffixes[lang])
 
                (ok, gotsome) = the_filter.filterDiff(input_path, out_path)
                # this is actually tricky, if we got some output for java
                # in one project but not the other, then we know that
                # there can't be any clones
                self.got_some[lang] = self.got_some[lang] and gotsome
                if not ok:
                    return ('Error processing: ' + file_name, False)
                self.operations_so_far.incr()
            if self.paths['proj0'] == self.paths['proj1']:
                print "filterDiffFiles: two paths same, breaking!!"
                break


    def filterDiffs(self, interface):
        self.got_some = {'java':True, 'cxx':True, 'hxx':True}
#        self.haveJava = haveC = haveH = False
        self.pb = PathBuilder(self.tmpPath, force_clean = True)

        # First, filter the input diffs by file type, so that all c diffs
        # are in one set of files, and similarly for java/headers
        if self.processDirectory is True:
            self.filterDiffProjs(interface)
        else:
            self.filterDiffFiles(interface)


        # Second, change each diff into ccFinder input format
        converter = CCFXInputConverter()
        callback = lambda: interface.progress(
                'Converting to ccfx input format',
                self.operations_so_far.incr() / float(self.num_operations))
        converter.convert(self.pb, callback)

        #new and old for 3 langs
        self.num_operations = 3 * 2 
        self.operations_so_far = IntegerWrapper(0)
        

        clone_path = self.pb.getCCFXOutputPath()
        # Third, call ccfx for each directory
        worked = True
        for lang in ['java', 'cxx', 'hxx']:
            if not self.got_some[lang]:
                interface.progress('ccFinderX executing',
                                   self.operations_so_far.incr() / float(self.num_operations))
                continue
            old_path0 = self.pb.getCCFXInputPath(PathBuilder.PROJ0, lang, False)
            old_path1 = self.pb.getCCFXInputPath(PathBuilder.PROJ1, lang, False)
            new_path0 = self.pb.getCCFXInputPath(PathBuilder.PROJ0, lang, True)
            new_path1 = self.pb.getCCFXInputPath(PathBuilder.PROJ1, lang, True)
            tmp_old_out = clone_path + self.pb.getCCFXOutputFileName(
                    lang, is_new = False, is_tmp = True)
            tmp_new_out = clone_path + self.pb.getCCFXOutputFileName(
                    lang, is_new = True, is_tmp = True)
            old_out = clone_path + self.pb.getCCFXOutputFileName(
                    lang, is_new = False, is_tmp = False)
            new_out = clone_path + self.pb.getCCFXOutputFileName(
                    lang, is_new = True, is_tmp = False)
            
            if self.paths['proj0'] == self.paths['proj1']:
                old_path1 = old_path0
                new_path1 = new_path0   
                    
            worked = worked and self.ccfx.processPair(
                            old_path0, old_path1, tmp_old_out, old_out, lang)
            interface.progress('ccFinderX executing',
                self.operations_so_far.incr() / float(self.num_operations))
            worked = worked and self.ccfx.processPair(
                    new_path0, new_path1, tmp_new_out, new_out, lang)
            interface.progress('ccFinderX executing',
                self.operations_so_far.incr() / float(self.num_operations))
        if not worked:
            return ('ccFinderX execution failed', False)
       
         # Fourth, build up our database of clones 
        print "Repertoire filtering...."
        #new and old for 3 langs
        self.num_operations = 3 * 2 
        self.operations_so_far = IntegerWrapper(0)

        for lang in ['java', 'cxx', 'hxx']:
            if not self.got_some[lang]:
                interface.progress('Repertoire filtering based on operation',
                                   self.operations_so_far.incr() / float(self.num_operations))
                continue
            for is_new in [True, False]:
                output = convert_ccfx_output(self.pb, lang, is_new)
                rep_out_path = self.pb.getRepertoireOutputPath(lang, is_new)
                suffix = '_old.txt'
                if is_new:
                    suffix = '_new.txt'
                output.writeToFile(rep_out_path + lang + suffix)
                interface.progress('Repertoire filtering based on operation',
                                   self.operations_so_far.incr() / float(self.num_operations))
                

        print "Processing successful!!"
        return ('Processing successful', True)
Exemplo n.º 7
0
 def __init__(self):
     self.processDirectory = True
     self.num_operations = 0
     self.operations_so_far = IntegerWrapper(0)
     self.ccfx = CCFXEntryPoint('../ccFinder/ccfx',40,True,True)
     self.flags = {"No":True, "Yes":False}
Exemplo n.º 8
0
class RepertoireModel:
    def __init__(self):
        self.processDirectory = True
        self.num_operations = 0
        self.operations_so_far = IntegerWrapper(0)
#self.ccfx = CCFXEntryPoint('/home/tang/research/linux/Repertoire/ccFinderx/ccfx', 40, False, True)
#self.ccfx = CCFXEntryPoint('/home/tang/nas11/research/ray/project/Repertoire/ccFinderx/ccfx', 40, False, True)
        self.ccfx = CCFXEntryPoint('/if7/ct4ew/research/ray/project/Repertoire_new/ccFinderx/ccfx', 40, False, True)
        self.flags = {"No":True, "Yes":False}
        self.path = {}
        self.isProcessDiff = False
        self.tmpPath = None
        self.outPath = None


    def setDiffPath(self, path = None):
        self.isProcessDiff = True
        path = str(path)

        if os.path.isdir(path):
            self.ccfx.isDirectory = True
        else:
            self.ccfx.isDirectory = False

        projNo = len(self.path)
        proj = 'proj' + str(projNo)
        self.path[proj] = path
#        print self.path
        return True

    def setOutDirectory(self, path):
        #Just setting the outer directory
        path = str(path)
        if not (path.startswith("/home") or path.startswith("~/")):
            path = os.getcwd() + os.sep + path


        if not os.path.isdir(path):
            os.mkdir(path)
        self.outPath = path
        print "output files will be stored at " + self.outPath
        return True

    def setTmpDirectory(self, path):
        path = str(path)
        if not os.path.isdir(path):
            os.mkdir(path)
        self.tmpPath = path
        print "output files will be stored at " + self.tmpPath
        return True

    def setSuffixes(self, jSuff = '', cSuff = '', hSuff = ''):
        jSuff = str(jSuff)
        cSuff = str(cSuff)
        hSuff = str(hSuff)
        if jSuff.startswith('.'):
            jSuff = jSuff[1:]
        if cSuff.startswith('.'):
            cSuff = cSuff[1:]
        if hSuff.startswith('.'):
            hSuff = hSuff[1:]
        self.suffixes = {
                'java':jSuff,
                'cxx':cSuff,
                'hxx':hSuff,
                }
        self.filters = {
                'java' : DiffFilter(jSuff),
                'cxx'  : DiffFilter(cSuff),
                'hxx'  : DiffFilter(hSuff)
                }

    def setCcfxDirectory(self, path):
        path = str(path)
        if not os.path.isdir(path):
            return False
        ccfx_binary = path + "/ccfx"
        if os.path.exists(ccfx_binary):
            self.ccfx.ccfxPath = ccfx_binary
            return True
        return False

    def setCcfxToken(self, token_size):
        self.ccfx.tokenSize = token_size
        print "setting ccFinder token size = " + token_size
        return True

    def setCcfxFileSeparator(self, flag):
        self.ccfx.fileSep = self.flags[str(flag)]
        print "setting ccFinder file separator flag to %d" % (self.ccfx.fileSep)
        return True

    def setCcfxGroupSeparator(self, flag):
        self.ccfx.grpSep = self.flags[str(flag)]
        print "setting ccFinder group separator flag to %d" % (self.ccfx.grpSep)
        return True


    def filterDiffProj(self,proj):
        path = self.path[proj]

        self.num_operations =  3 * 2
        self.num_operations += len(os.listdir(path)) * 3
        self.operations_so_far = IntegerWrapper(0)

#for lang in ['java', 'cxx', 'hxx']:
        for lang in ['cxx']:
#for lang in ['c']:
           the_filter = self.filters[lang]
           for i, file_name in enumerate(os.listdir(path)):
               self.progress('Filtering {0} files'.format(lang))
               input_path = path + os.sep + file_name
               print file_name
               print input_path
               out_path = (self.pb.getFilterOutputPath(proj, lang) +
                            file_name + '.' + self.suffixes[lang])
               print out_path
               (ok, gotsome) = the_filter.filterDiff(input_path, out_path)
               self.got_some[lang] = self.got_some[lang] and gotsome
               if not ok:
                   return ('Error processing: ' + file_name, False)
               self.operations_so_far.incr()



    def filterDiffFile(self,diff_file):
        # 3 different file formats, 2 operations each (filter/convert)
        self.num_operations =  3 * 2
        self.operations_so_far = IntegerWrapper(0)

        for lang in ['java', 'cxx', 'hxx']:
            the_filter = self.filters[lang]
            self.progress('Filtering {0} files'.format(lang))
            input_path = diff_file
            out_path = (self.pb.getFilterOutputPath(proj, lang) +
                        os.path.basename(input_path) + '.' + self.suffixes[lang])

            (ok, gotsome) = the_filter.filterDiff(input_path, out_path)
            self.got_some[lang] = self.got_some[lang] and gotsome
            if not ok:
                return ('Error processing: ' + file_name, False)
            self.operations_so_far.incr()


    def progress(self,msg):
        progressSoFar = (self.operations_so_far.value / float(self.num_operations))*100
        print "%s..: %f" % (msg,progressSoFar)


    def processDiffs(self,proj,path):
        #self.got_some = {'java':True, 'cxx':True, 'hxx':True}
        self.got_some = {'java':False, 'cxx':True, 'hxx':False}
        self.pb = PathBuilder(self.tmpPath, force_clean = True)

        if os.path.isdir(path) is True:
                self.filterDiffProj(proj)
        elif os.path.isfile(path) is True:
                self.filterDiffFile(proj)
        else:
            return ('Invalid path : ' + path, False)

        # Second, change each diff into ccFinder input format
        converter = CCFXInputConverter()
        progress = (self.operations_so_far.incr() / float(self.num_operations))*100
        callback = lambda: self.progress('Converting to ccfx input format')

        converter.convert(proj, self.pb, callback)

        self.num_operations = 3 * 2
        self.operations_so_far = IntegerWrapper(0)

        return ("Converting diffs to ccFinder compatible format is done",True)


    def processDiff(self):
        for proj,path in self.path.items():
            self.processDiffs(proj, path)

    def runCCFinderSelf(self,proj,path):
        print 'Chong Tang: In runCCFinderSelf() function ----'
        clone_path = self.pb.getCCFXOutputPath()
        # Third, call ccfx for each directory
        worked = True
        for lang in ['java', 'cxx', 'hxx']:
            #print 'Chong Tang: In runCCFinerSelf for loop...'
            #print 'Chong: got_some of java: ' + str(self.got_some['java'])
            if not self.got_some[lang]:
                self.progress('ccFinderX executing')
                continue
            #print 'Chong Tang: In runCCFinerSelf for loop, after if statement'
            old_path = self.pb.getCCFXInputPath(proj, lang, False)
            new_path = self.pb.getCCFXInputPath(proj, lang, True)

            tmp_old_out = clone_path + self.pb.getCCFXOutputFileName(
                    lang, is_new = False, is_tmp = True)
            tmp_new_out = clone_path + self.pb.getCCFXOutputFileName(
                    lang, is_new = True, is_tmp = True)
            old_out = clone_path + self.pb.getCCFXOutputFileName(
                    lang, is_new = False, is_tmp = False)
            new_out = clone_path + self.pb.getCCFXOutputFileName(
                    lang, is_new = True, is_tmp = False)

            print 'Chong Tang: before first processPairSelf calling' 
            worked = worked and self.ccfx.processPairSelf(
                            old_path, tmp_old_out, old_out, lang)
            print 'Chong Tang: after first processPairSelf calling' 
            self.progress('ccFinderX executing')

            print 'Chong Tang: before second processPairSelf calling' 
            worked = worked and self.ccfx.processPairSelf(
                    new_path, tmp_new_out, new_out, lang)
            print 'Chong Tang: after second processPairSelf calling' 
            self.progress('ccFinderX executing')
        if not worked:
            return ('ccFinderX execution failed', False)

        self.runRep(proj)


#        #new and old for 3 langs
    def runRep(self,proj):
        print "Repertoire filtering...."
        self.num_operations = 3 * 2
        self.operations_so_far = IntegerWrapper(0)

        for lang in ['java', 'cxx', 'hxx']:
            if not self.got_some[lang]:
                self.progress('Repertoire filtering based on operation')
                continue
            for is_new in [True, False]:
                output = convert_ccfx_output(self.pb,proj,lang, is_new)
                rep_out_path = self.pb.getRepertoireOutputPath(lang, is_new)
                suffix = '_old.txt'
                if is_new:
                    suffix = '_new.txt'
                output.writeToFile(rep_out_path + lang + suffix)
                self.progress('Repertoire filtering based on operation')


        print "Processing successful!!"
        return ('Processing successful', True)
Exemplo n.º 9
0
class RepertoireModel:
    def __init__(self):
        self.processDirectory = True
        self.num_operations = 0
        self.operations_so_far = IntegerWrapper(0)
        #self.ccfx = CCFXEntryPoint('/home/tang/research/linux/Repertoire/ccFinderx/ccfx', 40, False, True)
        #self.ccfx = CCFXEntryPoint('/home/tang/nas11/research/ray/project/Repertoire/ccFinderx/ccfx', 40, False, True)
        self.ccfx = CCFXEntryPoint(
            '/if7/ct4ew/research/ray/project/Repertoire_new/ccFinderx/ccfx',
            40, False, True)
        self.flags = {"No": True, "Yes": False}
        self.path = {}
        self.isProcessDiff = False
        self.tmpPath = None
        self.outPath = None

    def setDiffPath(self, path=None):
        self.isProcessDiff = True
        path = str(path)

        if os.path.isdir(path):
            self.ccfx.isDirectory = True
        else:
            self.ccfx.isDirectory = False

        projNo = len(self.path)
        proj = 'proj' + str(projNo)
        self.path[proj] = path
        #        print self.path
        return True

    def setOutDirectory(self, path):
        #Just setting the outer directory
        path = str(path)
        if not (path.startswith("/home") or path.startswith("~/")):
            path = os.getcwd() + os.sep + path

        if not os.path.isdir(path):
            os.mkdir(path)
        self.outPath = path
        print "output files will be stored at " + self.outPath
        return True

    def setTmpDirectory(self, path):
        path = str(path)
        if not os.path.isdir(path):
            os.mkdir(path)
        self.tmpPath = path
        print "output files will be stored at " + self.tmpPath
        return True

    def setSuffixes(self, jSuff='', cSuff='', hSuff=''):
        jSuff = str(jSuff)
        cSuff = str(cSuff)
        hSuff = str(hSuff)
        if jSuff.startswith('.'):
            jSuff = jSuff[1:]
        if cSuff.startswith('.'):
            cSuff = cSuff[1:]
        if hSuff.startswith('.'):
            hSuff = hSuff[1:]
        self.suffixes = {
            'java': jSuff,
            'cxx': cSuff,
            'hxx': hSuff,
        }
        self.filters = {
            'java': DiffFilter(jSuff),
            'cxx': DiffFilter(cSuff),
            'hxx': DiffFilter(hSuff)
        }

    def setCcfxDirectory(self, path):
        path = str(path)
        if not os.path.isdir(path):
            return False
        ccfx_binary = path + "/ccfx"
        if os.path.exists(ccfx_binary):
            self.ccfx.ccfxPath = ccfx_binary
            return True
        return False

    def setCcfxToken(self, token_size):
        self.ccfx.tokenSize = token_size
        print "setting ccFinder token size = " + token_size
        return True

    def setCcfxFileSeparator(self, flag):
        self.ccfx.fileSep = self.flags[str(flag)]
        print "setting ccFinder file separator flag to %d" % (
            self.ccfx.fileSep)
        return True

    def setCcfxGroupSeparator(self, flag):
        self.ccfx.grpSep = self.flags[str(flag)]
        print "setting ccFinder group separator flag to %d" % (
            self.ccfx.grpSep)
        return True

    def filterDiffProj(self, proj):
        path = self.path[proj]

        self.num_operations = 3 * 2
        self.num_operations += len(os.listdir(path)) * 3
        self.operations_so_far = IntegerWrapper(0)

        #for lang in ['java', 'cxx', 'hxx']:
        for lang in ['cxx']:
            #for lang in ['c']:
            the_filter = self.filters[lang]
            for i, file_name in enumerate(os.listdir(path)):
                self.progress('Filtering {0} files'.format(lang))
                input_path = path + os.sep + file_name
                print file_name
                print input_path
                out_path = (self.pb.getFilterOutputPath(proj, lang) +
                            file_name + '.' + self.suffixes[lang])
                print out_path
                (ok, gotsome) = the_filter.filterDiff(input_path, out_path)
                self.got_some[lang] = self.got_some[lang] and gotsome
                if not ok:
                    return ('Error processing: ' + file_name, False)
                self.operations_so_far.incr()

    def filterDiffFile(self, diff_file):
        # 3 different file formats, 2 operations each (filter/convert)
        self.num_operations = 3 * 2
        self.operations_so_far = IntegerWrapper(0)

        for lang in ['java', 'cxx', 'hxx']:
            the_filter = self.filters[lang]
            self.progress('Filtering {0} files'.format(lang))
            input_path = diff_file
            out_path = (self.pb.getFilterOutputPath(proj, lang) +
                        os.path.basename(input_path) + '.' +
                        self.suffixes[lang])

            (ok, gotsome) = the_filter.filterDiff(input_path, out_path)
            self.got_some[lang] = self.got_some[lang] and gotsome
            if not ok:
                return ('Error processing: ' + file_name, False)
            self.operations_so_far.incr()

    def progress(self, msg):
        progressSoFar = (self.operations_so_far.value /
                         float(self.num_operations)) * 100
        print "%s..: %f" % (msg, progressSoFar)

    def processDiffs(self, proj, path):
        #self.got_some = {'java':True, 'cxx':True, 'hxx':True}
        self.got_some = {'java': False, 'cxx': True, 'hxx': False}
        self.pb = PathBuilder(self.tmpPath, force_clean=True)

        if os.path.isdir(path) is True:
            self.filterDiffProj(proj)
        elif os.path.isfile(path) is True:
            self.filterDiffFile(proj)
        else:
            return ('Invalid path : ' + path, False)

        # Second, change each diff into ccFinder input format
        converter = CCFXInputConverter()
        progress = (self.operations_so_far.incr() /
                    float(self.num_operations)) * 100
        callback = lambda: self.progress('Converting to ccfx input format')

        converter.convert(proj, self.pb, callback)

        self.num_operations = 3 * 2
        self.operations_so_far = IntegerWrapper(0)

        return ("Converting diffs to ccFinder compatible format is done", True)

    def processDiff(self):
        for proj, path in self.path.items():
            self.processDiffs(proj, path)

    def runCCFinderSelf(self, proj, path):
        print 'Chong Tang: In runCCFinderSelf() function ----'
        clone_path = self.pb.getCCFXOutputPath()
        # Third, call ccfx for each directory
        worked = True
        for lang in ['java', 'cxx', 'hxx']:
            #print 'Chong Tang: In runCCFinerSelf for loop...'
            #print 'Chong: got_some of java: ' + str(self.got_some['java'])
            if not self.got_some[lang]:
                self.progress('ccFinderX executing')
                continue
            #print 'Chong Tang: In runCCFinerSelf for loop, after if statement'
            old_path = self.pb.getCCFXInputPath(proj, lang, False)
            new_path = self.pb.getCCFXInputPath(proj, lang, True)

            tmp_old_out = clone_path + self.pb.getCCFXOutputFileName(
                lang, is_new=False, is_tmp=True)
            tmp_new_out = clone_path + self.pb.getCCFXOutputFileName(
                lang, is_new=True, is_tmp=True)
            old_out = clone_path + self.pb.getCCFXOutputFileName(
                lang, is_new=False, is_tmp=False)
            new_out = clone_path + self.pb.getCCFXOutputFileName(
                lang, is_new=True, is_tmp=False)

            print 'Chong Tang: before first processPairSelf calling'
            worked = worked and self.ccfx.processPairSelf(
                old_path, tmp_old_out, old_out, lang)
            print 'Chong Tang: after first processPairSelf calling'
            self.progress('ccFinderX executing')

            print 'Chong Tang: before second processPairSelf calling'
            worked = worked and self.ccfx.processPairSelf(
                new_path, tmp_new_out, new_out, lang)
            print 'Chong Tang: after second processPairSelf calling'
            self.progress('ccFinderX executing')
        if not worked:
            return ('ccFinderX execution failed', False)

        self.runRep(proj)


#        #new and old for 3 langs

    def runRep(self, proj):
        print "Repertoire filtering...."
        self.num_operations = 3 * 2
        self.operations_so_far = IntegerWrapper(0)

        for lang in ['java', 'cxx', 'hxx']:
            if not self.got_some[lang]:
                self.progress('Repertoire filtering based on operation')
                continue
            for is_new in [True, False]:
                output = convert_ccfx_output(self.pb, proj, lang, is_new)
                rep_out_path = self.pb.getRepertoireOutputPath(lang, is_new)
                suffix = '_old.txt'
                if is_new:
                    suffix = '_new.txt'
                output.writeToFile(rep_out_path + lang + suffix)
                self.progress('Repertoire filtering based on operation')

        print "Processing successful!!"
        return ('Processing successful', True)
Exemplo n.º 10
0
    def processImpl(self, model):
        proj0 = model.getProj(PathBuilder.Proj0)
        proj1 = model.getProj(PathBuilder.Proj1)
        path_builder = model.getPathBuilder()
        converter = CCFXInputConverter()
        ccfx = CCFXEntryPoint(path_builder, model.getCcfxPath(), model.getCcfxTokenSize())

        step = 0
        total_steps = 20.0
        final_status = False

        # step == 0:
        print "Loading version histories for first project" + str(step / total_steps)
        step += 1
        proj0.load()

        print "Loading version histories for second project" + str(step / total_steps)
        step += 1
        proj1.load()

        print "Dumping commits for first project" + str(step / total_steps)
        step += 1
        proj0.dumpCommits()

        print "Dumping commits for second project" + str(step / total_steps)
        step += 1
        proj1.dumpCommits()

        print "Converting diffs to ccfx compatible format" + str(step / total_steps)
        step += 1
        converter.convert(path_builder)

        print "Running ccFinder for old C, this will take quite some time..." + str(step / total_steps)
        step += 1
        have_old_c = ccfx.processPairs(LangDecider.CXX, False)

        print "Running ccFinder for new C, this will take quite some time..." + str(step / total_steps)
        step += 1
        have_new_c = ccfx.processPairs(LangDecider.CXX, True)

        print "Running ccFinder for old headers, this will take quite some time..." + str(step / total_steps)
        step += 1
        have_old_h = ccfx.processPairs(LangDecider.HXX, False)

        print "Running ccFinder for new headers, this will take quite some time..." + str(step / total_steps)
        step += 1
        have_new_h = ccfx.processPairs(LangDecider.HXX, True)

        print "Running ccFinder for old Java, this will take quite some time..." + str(step / total_steps)
        step += 1
        have_old_j = ccfx.processPairs(LangDecider.JAVA, False)

        print "Running ccFinder for new Java, this will take quite some time..." + str(step / total_steps)
        step += 1
        have_new_j = ccfx.processPairs(LangDecider.JAVA, True)

        print "Filtering ccFinder old C output based on operation..." + str(step / total_steps)
        step += 1
        if have_old_c:
            is_new = False
            lang = LangDecider.CXX
            output = convert_ccfx_output(path_builder, lang, is_new)
            rep_out_path = path_builder.getRepertoireOutputPath(lang, is_new)
            rep_out_file = path_builder.getRepertoireOutputFileName(lang, is_new)
            output.writeToFile(rep_out_path + rep_out_file)

        print "Filtering ccFinder new C output based on operation..." + str(step / total_steps)
        step += 1
        if have_new_c:
            is_new = True
            lang = LangDecider.CXX
            output = convert_ccfx_output(path_builder, lang, is_new)
            rep_out_path = path_builder.getRepertoireOutputPath(lang, is_new)
            rep_out_file = path_builder.getRepertoireOutputFileName(lang, is_new)
            output.writeToFile(rep_out_path + rep_out_file)

        print "Filtering ccFinder old header output based on operation..." + str(step / total_steps)
        step += 1
        if have_old_h:
            is_new = False
            lang = LangDecider.HXX
            output = convert_ccfx_output(path_builder, lang, is_new)
            rep_out_path = path_builder.getRepertoireOutputPath(lang, is_new)
            rep_out_file = path_builder.getRepertoireOutputFileName(lang, is_new)
            output.writeToFile(rep_out_path + rep_out_file)

        print "Filtering ccFinder new header output based on operation..." + str(step / total_steps)
        step += 1
        if have_new_h:
            is_new = True
            lang = LangDecider.HXX
            output = convert_ccfx_output(path_builder, lang, is_new)
            rep_out_path = path_builder.getRepertoireOutputPath(lang, is_new)
            rep_out_file = path_builder.getRepertoireOutputFileName(lang, is_new)
            output.writeToFile(rep_out_path + rep_out_file)

        print "Filtering ccFinder old java output based on operation..." + str(step / total_steps)
        step += 1
        if not have_old_j:
            is_new = False
            lang = LangDecider.JAVA
            output = convert_ccfx_output(path_builder, lang, is_new)
            rep_out_path = path_builder.getRepertoireOutputPath(lang, is_new)
            rep_out_file = path_builder.getRepertoireOutputFileName(lang, is_new)
            output.writeToFile(rep_out_path + rep_out_file)

        print "Filtering ccFinder new java output based on operation..." + str(step / total_steps)
        step += 1
        if not have_new_j:
            is_new = True
            lang = LangDecider.JAVA
            output = convert_ccfx_output(path_builder, lang, is_new)
            rep_out_path = path_builder.getRepertoireOutputPath(lang, is_new)
            rep_out_file = path_builder.getRepertoireOutputFileName(lang, is_new)
            output.writeToFile(rep_out_path + rep_out_file)

        print "Combining ccFinder output into a unified database..." + str(step / total_steps)
        step += 1
        pickle.dump(model, open(path_builder.getModelPathAndName(), "w"))
        rep_populator = RepDBPopulator(path_builder)
        db = rep_populator.generateDB(proj0, proj1)
        db_file = open(path_builder.getDBPathAndName(), "w")
        pickle.dump(db, db_file)
        db_file.close()

        final_status = True

        if final_status:
            return "Success!", final_status
        return "Aborting", final_status
Exemplo n.º 11
0
 def __init__(self, model):
     self.proj0 = model.getProj(PathBuilder.Proj0)
     self.proj1 = model.getProj(PathBuilder.Proj1)
     self.path_builder = model.getPathBuilder()
     self.converter = CCFXInputConverter()
     self.ccfx = CCFXEntryPoint(self.path_builder, model.getCcfxPath(), model.getCcfxTokenSize())
Exemplo n.º 12
0
class RepDriver:
    def __init__(self, model):
        self.proj0 = model.getProj(PathBuilder.Proj0)
        self.proj1 = model.getProj(PathBuilder.Proj1)
        self.path_builder = model.getPathBuilder()
        self.converter = CCFXInputConverter()
        self.ccfx = CCFXEntryPoint(self.path_builder, model.getCcfxPath(), model.getCcfxTokenSize())

    def ccfxConvert(self):
        print "Converting diffs to ccfx compatible format"
        if self.proj0:
            proj0_repo = self.proj0.getRepoRoot() + os.sep
            print proj0_repo
            self.path_builder.setExtDiffPath(0, proj0_repo)

        if self.proj1:
            proj1_repo = self.proj1.getRepoRoot() + os.sep
            print proj1_repo
            self.path_builder.setExtDiffPath(1, proj1_repo)

        self.converter.convertExtDiffs(self.path_builder)

    def runCCFX_old(self, lang):
        print "Running ccFinder for old files, this will take quite some time..."
        have_old_lang = self.ccfx.processPairs(lang, False)

        print "Filtering ccFinder old  output based on operation..."
        if have_old_lang:
            is_new = False
            output = convert_ccfx_output(self.path_builder, lang, is_new)
            rep_out_path = self.path_builder.getRepertoireOutputPath(lang, is_new)
            rep_out_file = self.path_builder.getRepertoireOutputFileName(lang, is_new)
            output.writeToFile(rep_out_path + rep_out_file)

    def runCCFX_new(self, lang):

        print "Running ccFinder for new files, this will take quite some time..."
        have_new_lang = self.ccfx.processPairs(lang, True)

        print "Filtering ccFinder new  output based on operation..."
        if have_new_lang:
            is_new = True
            #            lang = have_new_lang
            output = convert_ccfx_output(self.path_builder, lang, is_new)
            rep_out_path = self.path_builder.getRepertoireOutputPath(lang, is_new)
            rep_out_file = self.path_builder.getRepertoireOutputFileName(lang, is_new)
            output.writeToFile(rep_out_path + rep_out_file)

    def runCCFX(self):
        #     self.runCCFX_old()
        for lang in [LangDecider.CXX, LangDecider.HXX, LangDecider.JAVA]:
            self.runCCFX_new(lang)
            self.runCCFX_old(lang)

    def process(self, rep_model):
        msg, success = self.processImpl(rep_model)

    def processImpl(self, model):
        proj0 = model.getProj(PathBuilder.Proj0)
        proj1 = model.getProj(PathBuilder.Proj1)
        path_builder = model.getPathBuilder()
        converter = CCFXInputConverter()
        ccfx = CCFXEntryPoint(path_builder, model.getCcfxPath(), model.getCcfxTokenSize())

        step = 0
        total_steps = 20.0
        final_status = False

        # step == 0:
        print "Loading version histories for first project" + str(step / total_steps)
        step += 1
        proj0.load()

        print "Loading version histories for second project" + str(step / total_steps)
        step += 1
        proj1.load()

        print "Dumping commits for first project" + str(step / total_steps)
        step += 1
        proj0.dumpCommits()

        print "Dumping commits for second project" + str(step / total_steps)
        step += 1
        proj1.dumpCommits()

        print "Converting diffs to ccfx compatible format" + str(step / total_steps)
        step += 1
        converter.convert(path_builder)

        print "Running ccFinder for old C, this will take quite some time..." + str(step / total_steps)
        step += 1
        have_old_c = ccfx.processPairs(LangDecider.CXX, False)

        print "Running ccFinder for new C, this will take quite some time..." + str(step / total_steps)
        step += 1
        have_new_c = ccfx.processPairs(LangDecider.CXX, True)

        print "Running ccFinder for old headers, this will take quite some time..." + str(step / total_steps)
        step += 1
        have_old_h = ccfx.processPairs(LangDecider.HXX, False)

        print "Running ccFinder for new headers, this will take quite some time..." + str(step / total_steps)
        step += 1
        have_new_h = ccfx.processPairs(LangDecider.HXX, True)

        print "Running ccFinder for old Java, this will take quite some time..." + str(step / total_steps)
        step += 1
        have_old_j = ccfx.processPairs(LangDecider.JAVA, False)

        print "Running ccFinder for new Java, this will take quite some time..." + str(step / total_steps)
        step += 1
        have_new_j = ccfx.processPairs(LangDecider.JAVA, True)

        print "Filtering ccFinder old C output based on operation..." + str(step / total_steps)
        step += 1
        if have_old_c:
            is_new = False
            lang = LangDecider.CXX
            output = convert_ccfx_output(path_builder, lang, is_new)
            rep_out_path = path_builder.getRepertoireOutputPath(lang, is_new)
            rep_out_file = path_builder.getRepertoireOutputFileName(lang, is_new)
            output.writeToFile(rep_out_path + rep_out_file)

        print "Filtering ccFinder new C output based on operation..." + str(step / total_steps)
        step += 1
        if have_new_c:
            is_new = True
            lang = LangDecider.CXX
            output = convert_ccfx_output(path_builder, lang, is_new)
            rep_out_path = path_builder.getRepertoireOutputPath(lang, is_new)
            rep_out_file = path_builder.getRepertoireOutputFileName(lang, is_new)
            output.writeToFile(rep_out_path + rep_out_file)

        print "Filtering ccFinder old header output based on operation..." + str(step / total_steps)
        step += 1
        if have_old_h:
            is_new = False
            lang = LangDecider.HXX
            output = convert_ccfx_output(path_builder, lang, is_new)
            rep_out_path = path_builder.getRepertoireOutputPath(lang, is_new)
            rep_out_file = path_builder.getRepertoireOutputFileName(lang, is_new)
            output.writeToFile(rep_out_path + rep_out_file)

        print "Filtering ccFinder new header output based on operation..." + str(step / total_steps)
        step += 1
        if have_new_h:
            is_new = True
            lang = LangDecider.HXX
            output = convert_ccfx_output(path_builder, lang, is_new)
            rep_out_path = path_builder.getRepertoireOutputPath(lang, is_new)
            rep_out_file = path_builder.getRepertoireOutputFileName(lang, is_new)
            output.writeToFile(rep_out_path + rep_out_file)

        print "Filtering ccFinder old java output based on operation..." + str(step / total_steps)
        step += 1
        if not have_old_j:
            is_new = False
            lang = LangDecider.JAVA
            output = convert_ccfx_output(path_builder, lang, is_new)
            rep_out_path = path_builder.getRepertoireOutputPath(lang, is_new)
            rep_out_file = path_builder.getRepertoireOutputFileName(lang, is_new)
            output.writeToFile(rep_out_path + rep_out_file)

        print "Filtering ccFinder new java output based on operation..." + str(step / total_steps)
        step += 1
        if not have_new_j:
            is_new = True
            lang = LangDecider.JAVA
            output = convert_ccfx_output(path_builder, lang, is_new)
            rep_out_path = path_builder.getRepertoireOutputPath(lang, is_new)
            rep_out_file = path_builder.getRepertoireOutputFileName(lang, is_new)
            output.writeToFile(rep_out_path + rep_out_file)

        print "Combining ccFinder output into a unified database..." + str(step / total_steps)
        step += 1
        pickle.dump(model, open(path_builder.getModelPathAndName(), "w"))
        rep_populator = RepDBPopulator(path_builder)
        db = rep_populator.generateDB(proj0, proj1)
        db_file = open(path_builder.getDBPathAndName(), "w")
        pickle.dump(db, db_file)
        db_file.close()

        final_status = True

        if final_status:
            return "Success!", final_status
        return "Aborting", final_status