def testCodeExtractFileNotExistFailure(self):
     bugData = self.getIncorrectBugDataFileNotExist()
     with self.assertRaises(FileNotFoundError):
         extractor = CodeExtractor(bugData)
         extractor.loadCodeFromFile()
         extractor.extractBugCode()
         extractedCode = extractor.getBugCodeFragment()
 def testCodeExtractLineBoundariesSwapFailure(self):
     bugData = self.getIncorrectBugDataLineBoundariesSwap()
     with self.assertRaises(ValueError):
         extractor = CodeExtractor(bugData)
         extractor.loadCodeFromFile()
         extractor.extractBugCode()
         extractedCode = extractor.getBugCodeFragment()
    def testExtractCodeWithEmptyDiffBetweenTwoCommits(self):
        gp = gitprovider.GitProvider(config.getRepoDir())
        commits = gp.getAllVersions('trainDbScriptTest')
        commit1 = commits[-2]
        commit2 = commits[-2]
        file1 = gp.getFileContents('bugcode2.cpp', commit1)
        file2 = gp.getFileContents('bugcode2.cpp', commit2)
        diff = LinuxDiffer().diff(file1, file2)
        usedDiffs = []
        bugData = self.getBugData()
        extractor = CodeExtractor(bugData)
        extractor.loadCodeFromText(file1, '\r\n', '\n')
        extractor.extractBugCode()
        extractor.loadDiff(diff)
        with self.assertRaises(ValueError):
            extractor.extractFixCode()
        bugCode = extractor.getBugCodeFragment()
        fixCode = extractor.getFixCodeFragment()
        usedDiffs = extractor.getUsedDiffs()
        expectedOutputFix = ''
        expectedOutputBug = """int main(void)
{
    int a;
    a = 3;
    a = 0;
    if (a == 0)
    {
"""
        self.assertEqual(expectedOutputBug, bugCode)
        self.assertEqual(expectedOutputFix, fixCode)
        self.assertEqual(0, len(usedDiffs))
 def testCodeExtractSuccess(self):
     bugData = self.getCorrectBugDataFileMiddle()
     extractor = CodeExtractor(bugData)
     extractor.loadCodeFromFile()
     extractor.extractBugCode()
     extractedCode = extractor.getBugCodeFragment()
     correctCode = self.getBugCodeFileMiddle()
     self.assertEqual(extractedCode, correctCode)
    def testCodeExtractFileEndSuccess(self):
        bugData = self.getCorrectBugDataFileEnd()
        extractor = CodeExtractor(bugData)
        extractor.loadCodeFromFile()
        extractor.extractBugCode()
        extractedCode = extractor.getBugCodeFragment()
        correctCode = """        cout << b << endl;
    }
    return 0;
}"""
        self.assertEqual(extractedCode, correctCode)
    def testCodeExtractFileBeginSuccess(self):
        bugData = self.getCorrectBugDataFileBegin()
        extractor = CodeExtractor(bugData)
        extractor.loadCodeFromFile()
        extractor.extractBugCode()
        extractedCode = extractor.getBugCodeFragment()
        correctCode = """#include <iostream>

using namespace std;

"""
        self.assertEqual(extractedCode, correctCode)
    def testCodeExtractMultiLineSuccess(self):
        bugData = self.getCorrectBugDataFileMultiLine()
        extractor = CodeExtractor(bugData)
        extractor.loadCodeFromFile()
        extractor.extractBugCode()
        extractedCode = extractor.getBugCodeFragment()
        correctCode = """
int main(void)
{
    int a;
    a = 3;
    a = 0;
    if (a == 0)
    {
        int b = 1 / a;
"""
        self.assertEqual(extractedCode, correctCode)
    def testExtractCodeWithDiffBetweenTwoCommitsMultiDiffInFragment(self):
        gp = gitprovider.GitProvider(config.getRepoDir())
        commits = gp.getAllVersions('trainDbScriptTest')
        commit1 = commits[-6]
        commit2 = commits[-7]
        file1 = gp.getFileContents('bugcode3.cpp', commit1)
        file2 = gp.getFileContents('bugcode3.cpp', commit2)
        diff = LinuxDiffer().diff(file1, file2)
        usedDiffs = []
        bugData = self.getBugData3()
        extractor = CodeExtractor(bugData)
        extractor.loadCodeFromText(file1, '\r\n', '\n')
        extractor.extractBugCode()
        extractor.loadDiff(diff)
        extractor.extractFixCode()
        bugCode = extractor.getBugCodeFragment()
        fixCode = extractor.getFixCodeFragment()
        usedDiffs = extractor.getUsedDiffs()
        expectedOutputFix = """{;
    int a;
    a = 0;
    a = 2;
    if (a != 0)
    {;
        int b = 1 / a;
"""
        expectedOutputBug = """{
    int a;
    a = 0;
    a = 2;
    if (a != 0)
    {
        int b = 1 / a;
"""
        self.assertEqual(expectedOutputBug, bugCode)
        self.assertEqual(expectedOutputFix, fixCode)
        self.assertEqual(2, len(usedDiffs))
示例#9
0
    def extractCode(self, id):
        bugData = self.ccdb.getNotResolvedBugData(id)
        #TODO: Possible improvement for bugData
        if bugData is None:
            #TODO: Implement custom errors
            return None

        fileRelativePath = self.convertFilePathToRepoRelativePath(
            bugData.getFile())
        try:
            fullCodeWithBug = self.vcs.getFileContents(
                fileRelativePath, self.commits[self.currentCommitIndex + 1])
            fullCodeWithoutBug = self.vcs.getFileContents(
                fileRelativePath, self.commits[self.currentCommitIndex])
        except KeyError as extractError:
            return None

        diff = POSIXDiffer().diff(fullCodeWithBug, fullCodeWithoutBug)

        extractor = CodeExtractor(bugData)
        try:
            extractor.extractAll(fullCodeWithBug, diff)
        except ValueError as extractError:
            return None

        bugCodeFragment = extractor.getBugCodeFragment()
        fixCodeFragment = extractor.getFixCodeFragment()

        usedDiffs = extractor.getUsedDiffs()
        #Easy version - ignore bug if none or more than one diff used to fix
        #TODO: Possible improvement here
        if len(usedDiffs) != 1:
            return None
        return entities.FixData(bugCodeFragment, fixCodeFragment,
                                bugData.getChecker(), bugData.getMessage(),
                                bugData.getLine() - bugData.getStartLine())
示例#10
0
    def predict(self, id, checker):
        # Load all bugs
        print("Loading bug data...")
        ids = []
        if id == -1:
            bugs = self.ccdb.getAllBugsForChecker(checker)
            ids = [x[0] for x in bugs]
        else:
            ids.append(id)

        # Loading model
        print("Loading model...")
        model = load_model(config.cfModelFilenameFormat.format(checker))
        model.summary()
        vLabels = ['NOT OK', 'OK', 'Skipped']

        # Initialize coder
        print("Initializing coder...")
        self.dictionary = Dictionary(checker)
        self.coder = Coder(self.dictionary)
        self.totalDictionaryLength = self.dictionary.length()

        # Predicting
        print("Starting predictions...")
        for i in ids:
            allData = self.ccdb.getBugData(i)
            if allData.getChecker(
            ) not in globals.availableCheckers or allData.getChecker(
            ) != checker:
                print("Bug #{0} - checker not supported".format(i))
            else:
                # Load extra tokens from checker message
                checkerInfo = self.checkers.extractTokensForChecker(
                    allData.getChecker(), allData.getMessage())
                # Retrieve code fragment with bug
                fileRelativePath = self.convertFilePathToRepoRelativePath(
                    allData.getFile())
                fullCodeWithBug = self.vcs.getFileContents(
                    fileRelativePath, self.commits[self.currentCommitIndex])
                extractor = CodeExtractor(allData)
                extractor.loadCodeFromText(fullCodeWithBug)
                extractor.extractBugCode()
                bugCodeFragment = extractor.getBugCodeFragment()
                fixCodeFragment = ''
                # Encode it
                encodedBugData, initialUnkList = self.coder.encode(
                    bugCodeFragment, checkerData=checkerInfo)
                # Convert to one-hot
                MODEL_X_MAX_LEN = model.get_layer(index=0).input_shape[1]
                if len(encodedBugData) > MODEL_X_MAX_LEN:
                    print(
                        "Bug #{0} - Code too big for model, ignored".format(i))
                    continue
                elif id == -1:
                    print("Bug #{0} - Good to go".format(i))
                    continue
                noZerosToPad = MODEL_X_MAX_LEN - len(encodedBugData)
                if noZerosToPad > 0:
                    encodedBugData = self.coder.applyPadding(
                        encodedBugData, noZerosToPad)
                X = np.zeros((1, MODEL_X_MAX_LEN, self.totalDictionaryLength))
                X[0] = self.coder.convertToOneHot(
                    encodedBugData,
                    np.zeros((MODEL_X_MAX_LEN, self.totalDictionaryLength)))
                # Predict and convert from one-hot
                Y = self.coder.convertFromOneHot(model.predict(X)[0])
                print(Y)
                # Decode
                Y = self.coder.removePadding(Y)
                fixCodeFragment = self.coder.decode(Y, initialUnkList)

                #Verify?
                vStatus = 2
                if config.cfVerifyPrediction:
                    # Apply fix in source code file
                    extractor.applyFix(fixCodeFragment)
                    extractor.saveToFile(allData.getFile())
                    # Run CodeChecker and analyze code
                    self.codeChecker.check(True)
                    resolvedIds = self.getDiffResolvedIds()
                    # Check if ID is resolved in tmp folder
                    isFixed = i in resolvedIds
                    # Set vStatus accordingly
                    if isFixed:
                        vStatus = 1
                    else:
                        vStatus = 0
                #Print
                print("Bug #{0} - summary".format(i))
                print("== Code fragment with bug ==")
                print(bugCodeFragment)
                print("== Suggested fix ==")
                print(fixCodeFragment)
                print("Verification: {0}".format(vLabels[vStatus]))
                a = ' '
                while a != 'y' and a != 'n':
                    a = input("Apply fix? (y/n): ")
                if a == 'y':
                    if not config.cfVerifyPrediction:
                        # Apply fix in source code file
                        extractor.applyFix(fixCodeFragment)
                        extractor.saveToFile(allData.getFile())
                elif config.cfVerifyPrediction:
                    # Revert file contents
                    self.vcs.checkout(self.commits[self.currentCommitIndex])
                print('Done')
        print("All done, exiting...")
示例#11
0
    def main(self):
        # Do analysis
        shutil.rmtree(config.getTmpDir())
        self.codeChecker.check(True)

        # Diff new
        newBugs = self.getDiffNew()

        if len(newBugs) < 1:
            print('No new bugs introduced, commit is accepted!')
            return
        
        print("New bugs found! Count: {0}. Attempting repairs...".format(len(newBugs)))

        # Load models
        models = {}
        for checker in globals.availableCheckers:
            models[checker] = load_model(config.cfModelFilenameFormat.format(checker))

        # Load all content from files having new
        files = set([self.convertFilePathToRepoRelativePath(x.getFile()) for x in newBugs])
        fileContents = {}
        for f in files:
            fn = config.getRepoDir() + f
            with open(fn, 'r') as fh:
                fileContents[f] = ''.join(fh.readlines())

        # For each file sort by bug line desc
        suggestions = []
        validSuggestions = 0
        for f in files:
            bugs = [x for x in newBugs if self.convertFilePathToRepoRelativePath(x.getFile()) == f]
            bugs.sort(key=lambda x: x.getLine(), reverse=True)
            print("=== File: {0} ===".format(f))
            # For each bug get a suggestion and test it
            for b in bugs:
                print("L{0}, Type: {1}".format(b.getLine(), b.getChecker()))
                # Prepare useful data
                dictionary = Dictionary(b.getChecker())
                coder = Coder(dictionary)
                totalDictionaryLength = dictionary.length()
                # Prepare and extract bug fragment
                checkerInfo = self.checkers.extractTokensForChecker(b.getChecker(), b.getMessage())
                extractor = CodeExtractor(b)
                extractor.loadCodeFromText(fileContents[f])
                extractor.extractBugCode()
                bugCodeFragment = extractor.getBugCodeFragment()
                fixCodeFragment = ''
                # Encode it
                encodedBugData, initialUnkList = coder.encode(bugCodeFragment, checkerData = checkerInfo)
                # Convert to one-hot
                MODEL_X_MAX_LEN = models[b.getChecker()].get_layer(index = 0).input_shape[1]

                if len(encodedBugData) > MODEL_X_MAX_LEN:
                    print("Ignored: Code too big for model")
                    continue

                noZerosToPad = MODEL_X_MAX_LEN - len(encodedBugData)
                if noZerosToPad > 0:
                    encodedBugData = coder.applyPadding(encodedBugData, noZerosToPad)
                X = np.zeros((1, MODEL_X_MAX_LEN, totalDictionaryLength))
                X[0] = coder.convertToOneHot(encodedBugData, np.zeros((MODEL_X_MAX_LEN, totalDictionaryLength)))
                # Predict and convert from one-hot
                Y = coder.convertFromOneHot(models[b.getChecker()].predict(X)[0])
                Y = coder.removePadding(Y)
                # Decode
                fixCodeFragment = coder.decode(Y, initialUnkList)[:-1]
                
                #Verify?
                vStatus = 2
                if config.cfVerifyPrediction:
                    # Apply fix in source code file
                    extractor.applyFix(fixCodeFragment)
                    extractor.saveToFile(b.getFile())
                    # Run CodeChecker and analyze code
                    shutil.rmtree(config.getTmpDir())
                    compilationLog = self.codeChecker.check(True)
                    newBugsAfterFix = self.getDiffNew()
                    # Check if ID is resolved in tmp folder
                    isFixed = 'Build failed' not in compilationLog
                    for nb in newBugsAfterFix:
                        if self.isBugDataEqual(b, nb):
                            isFixed = False
                    # Set vStatus accordingly
                    if isFixed:
                        vStatus = 1
                    else:
                        vStatus = 0
                    # Revert file
                    extractor.loadCodeFromText(fileContents[f])
                    extractor.saveToFile(b.getFile())
                if vStatus == 0:
                    print("Verification: Negative, cannot be applied")
                elif vStatus == 1:
                    print("Verification: Positive, can be applied")
                    validSuggestions += 1
                elif vStatus == 2:
                    print("Verification: Skipped")
                    validSuggestions += 1
                sugg = SuggestionData(f, b, bugCodeFragment, fixCodeFragment, vStatus)
                suggestions.append(sugg)
        print("Valid suggestions prepared for {0} / {1} bugs.".format(validSuggestions, len(newBugs)))

        if validSuggestions > 0:
            print("Apply valid suggestions (a), display them (d), ignore them (i) or abort commit (q)?")
            apply = False
            choice = True
            while choice:
                c = sys.stdin.read(1)
                if c == 'a':
                    apply = True
                    choice = False
                    print("Applying fixes...")
                elif c == 'i':
                    choice = False
                    print("Fixes ignored...")
                elif c == 'd':
                    self.displaySuggestions(suggestions)
                    print("Apply valid suggestions (a), ignore them (i) or abort commit (q)?")
                elif c == 'q':
                    print("Aborting commit...")
                    sys.exit(1)
            if apply:
                self.applyValidFixes(suggestions, files)
                print("Fixes applied!")
        if validSuggestions != len(newBugs):
            print("Unable to fix all bugs, continue with commit (c) or abort (q)?")
            choice = True
            while choice:
                c = sys.stdin.read(1)
                if c == 'c':
                    choice = False
                    print("Continuing...")
                elif c == 'q':
                    print("Aborting commit...")
                    sys.exit(1)
        else:
            print("Bugs corrected, commit is good to go!")
示例#12
0
def ProcessBugsInFile(fileName):
    # 5.1.
    # 5.2.
    for bug in bugsPerFile[fileName]:
        bugData = bugDataList[bug]
        cleanFn = fileName[:-4]
        fn = '../Results/Analysis/{0}_{1}_{2}.txt'.format(
            cleanFn, bug, bugData.getChecker())
        repoFn = os.fsdecode(
            os.path.join(os.fsencode(config.getRepoDir()),
                         os.fsencode(fileName)))
        if os.path.isfile(fn):
            continue
        model = None
        if bugData.getChecker() == 'deadcode.DeadStores':
            model = model1
            coder = coder1
            totalDictionaryLength = totalDictionaryLength1
        if bugData.getChecker(
        ) == 'clang-diagnostic-tautological-constant-out-of-range-compare':
            model = model2
            coder = coder2
            totalDictionaryLength = totalDictionaryLength2
        if bugData.getChecker() == 'clang-diagnostic-unused-parameter':
            model = model3
            coder = coder3
            totalDictionaryLength = totalDictionaryLength3
        if bugData.getChecker() == 'clang-diagnostic-constant-conversion':
            model = model4
            coder = coder4
            totalDictionaryLength = totalDictionaryLength4
        MODEL_X_MAX_LEN = model.get_layer(index=0).input_shape[1]

        # 5.2.1.
        fullCodeWithBug = fileContents[fileName]
        extractor = CodeExtractor(bugData)
        extractor.loadCodeFromText(fullCodeWithBug)
        extractor.extractBugCode()
        bugCodeFragment = extractor.getBugCodeFragment()

        # 5.2.2.
        fullCodeWithoutBug = ''.join(fix(extractor.code, bugData))
        diff = POSIXDiffer().diff(fullCodeWithBug, fullCodeWithoutBug)
        extractor.loadDiff(diff)
        try:
            extractor.extractFixCode()
        except ValueError as ve:
            print(
                "Unable to generate expected fix for bug #{0} ({1}), checker = {2}"
                .format(bug, fileName, bugData.getChecker()))
            continue
        expectedFixCodeFragment = extractor.getFixCodeFragment()

        # 5.2.3.
        checkerInfo = checkers.extractTokensForChecker(bugData.getChecker(),
                                                       bugData.getMessage())
        encodedBugData, initialUnkList = coder.encode(bugCodeFragment,
                                                      checkerData=checkerInfo)
        noZerosToPad = MODEL_X_MAX_LEN - len(encodedBugData)
        if noZerosToPad > 0:
            encodedBugData = coder.applyPadding(encodedBugData, noZerosToPad)
        X = np.zeros((1, MODEL_X_MAX_LEN, totalDictionaryLength))
        X[0] = coder.convertToOneHot(
            encodedBugData, np.zeros((MODEL_X_MAX_LEN, totalDictionaryLength)))
        Y = coder.convertFromOneHot(model.predict(X)[0])
        while (Y[-1] == 0):
            Y.pop()
        fixCodeFragment = coder.decode(Y, initialUnkList)

        # 5.2.4.
        isCompiling = False
        f1 = os.fsdecode(
            os.path.join(os.fsencode(config.getRepoDir()),
                         os.fsencode(cleanFn)))
        f2 = os.fsdecode(
            os.path.join(os.fsencode(config.getRepoDir()),
                         os.fsencode('{0}.o'.format(cleanFn))))
        if os.path.isfile(f1):
            os.remove(f1)
        if os.path.isfile(f2):
            os.remove(f2)
        extractor.loadCodeFromText(fullCodeWithBug)
        extractor.applyFix(fixCodeFragment)
        with open(repoFn, 'wt') as f:
            f.writelines(extractor.code)
        log = codechecker.runCmd(
            'CodeChecker check -e all -b "cd {0} && make {1}" -o /tmp/codefixer_{1}'
            .format(config.getRepoDir(), cleanFn))
        if os.path.isfile(f1):
            isCompiling = True

        # 5.2.5.
        isFixed = False
        ids = []
        if isCompiling:
            resolved = codechecker.diffResolved(
                config.getCcRunName(), '/tmp/codefixer_{0}'.format(cleanFn),
                ccdb)
            for bugInfo in resolved:
                ids.append(bugInfo['reportId'])
            if bug in ids:
                isFixed = True

        # 5.2.6.
        isExpected = False
        if fixCodeFragment == expectedFixCodeFragment:
            isExpected = True

        # 5.2.7.
        encodedExpFix, finalUnkList = coder.encode(expectedFixCodeFragment,
                                                   unkList=initialUnkList,
                                                   reverse=False)
        noXTokens = len(encodedExpFix)
        noYTokens = len(Y)
        noAllTokens = max(noXTokens, noYTokens)
        noCorrectTokens = 0
        #print(encodedExpFix)
        #print(Y)
        for i in range(min(noXTokens, noYTokens)):
            if encodedExpFix[i] == Y[i]:
                noCorrectTokens += 1

        # 5.2.8.
        with open(fn, 'wt') as f:
            f.write(
                '#BUG#\n{0}\n#EXP#\n{1}\n#FIX#\n{2}\n#STATS#\n{3},{4},{5},{6},{7},{8},{9}\n{10}\n{11}'
                .format(bugCodeFragment, expectedFixCodeFragment,
                        fixCodeFragment, isCompiling, isFixed, isExpected,
                        noXTokens, noYTokens, noAllTokens, noCorrectTokens,
                        log, ids))

        # 5.2.9.
        with open(repoFn, 'wt') as f:
            f.write(fileContents[fileName])

        # 5.2.10.
        # Not used due to multithreading issues, will be done on file-by-file basis after closing pool
    # 5.3.
    return fileName