示例#1
0
def run(df, predAtK):
    startTime = timer()
    columns = ['id', 'sourceText', 'targetText', 'predText', 'actLineNums', 'predLineNums', \
            'actSourceLine', 'localSourceLine', 'targetLine', 'predLine', \
            'actSourceAbsLine', 'localSourceAbsLine', 'targetAbsLine', 'predAbsLine', \
            'errSet', 'isLocated', 'isRelevant', 'isConcretized', 'isExactMatch', 'isCompiled']
    results = []  #True to turn on localization Module, False to turn off
    #allErrors = ClusterError.getAllErrs()

    # For each erroneous code
    for i, row in df.iterrows():
        srcID, trgtID = str(row['id']) + '_source', str(row['id']) + '_target'
        srcText, trgtText = str(row['sourceText']), str(row['targetText'])
        trgtErrLines, trgtErrAbsLines = str(
            row['targetLineText']).strip(), str(row['targetLineAbs']).strip()
        actLinesStr = str(row['lineNums_Text'])

        # Parse the source/erroneous code
        srcCodeObj, trgtCodeObj = Code(srcText,
                                       codeID=srcID), Code(trgtText,
                                                           codeID=trgtID)
        srcLines, trgtLines = srcText.splitlines(), trgtText.splitlines()
        errSet = ClusterError.getErrSetStr(AllErrs, srcCodeObj)

        # Fetch its abstraction
        srcAbsLines = AbstractWrapper.getProgAbstraction(srcCodeObj)
        trgtAbsLines = AbstractWrapper.getProgAbstraction(trgtCodeObj)

        #Fetch Line numbers
        lineNums = errLoc(activeLocalization, srcCodeObj, actLinesStr,
                          useTracers_errLoc)

        if srcCodeObj.getNumErrors() > 0:  # If there are errors
            # Run prediction on all erroneous lines
            predText, srcErrLines, predErrLines, srcErrAbsLines, predErrAbsLines, isConcretized, isExactMatch  = \
                runPerLine(srcCodeObj, srcLines, trgtLines, srcAbsLines, trgtAbsLines,errSet,lineNums,predAtK)

            # Calculate accuracy and log it
            isLocated, isRelevant, isCompiled = calcAccuracy(actLinesStr, lineNums, \
                trgtText, trgtErrAbsLines, predErrAbsLines, predErrLines, predText)

            results.append((row['id'], srcText, trgtText, predText, actLinesStr, H.joinList(lineNums), \
                row['sourceLineText'], H.joinList(srcErrLines), trgtErrLines, H.joinLL(predErrLines), \
                row['sourceLineAbs'], H.joinLL(srcErrAbsLines), trgtErrAbsLines, H.joinLL(predErrAbsLines), errSet, \
                H.toInt(isLocated), H.toInt(isRelevant), H.toInt(isConcretized), H.toInt(isExactMatch), H.toInt(isCompiled)))

        if i != 0 and i % 100 == 0:
            print('\t...', i, '/', len(df), 'Completed')
            # break

    endTime = timer()
    print('\n#Programs=', len(df), 'Time Taken=',
          round(endTime - startTime, 2), '(s)')
    return pd.DataFrame(results, columns=columns)
示例#2
0
def repairErrLine(srcCodeObj, repairLines, repairAbsLines, srcAbsLine,
                  trgtLine, trgtAbsLine, errSetLine, lineNum, predErrAbsLines,
                  predErrLines, predAtK):
    '''Pred@K and concretize the best line (with least errors)'''
    isConcretized, isExactMatch = None, None
    bestPredAbsLine, bestPredLine = None, None
    bestPredAbsLines, bestPredLines = repairAbsLines, repairLines

    prePredCodeObj = Code(H.joinList(repairLines))
    minNumErrs = prePredCodeObj.getNumErrors()

    for predAbsLine in Predict.predictAbs(srcAbsLine, errSetLine, trgtAbsLine,
                                          predAtK):
        # Create copy of previous obtained repairLines, and replace with predictedLines
        predLines, predAbsLines = copy.deepcopy(repairLines), copy.deepcopy(
            repairAbsLines)
        predAbsLines[lineNum - 1] = H.joinList(predAbsLine, joinStr=' ')

        # Concretize the predicted abstract fix
        predLine, tempIsConcretized = ConcreteWrapper.attemptConcretization(
            srcCodeObj, lineNum, predAbsLine)
        predLines[lineNum - 1] = H.joinList(predLine, joinStr=' ')

        # Concretization success?
        isConcretized = H.NoneAnd(isConcretized, tempIsConcretized)
        tempIsExactMatch = checkRelevant2(predAbsLine, trgtAbsLine)
        isExactMatch = H.NoneOr(isExactMatch, tempIsExactMatch)

        # Find best prediction
        predCodeObj = Code(H.joinList(predLines))
        if minNumErrs is None or predCodeObj.getNumErrors() < minNumErrs:
            minNumErrs = predCodeObj.getNumErrors()
            bestPredAbsLines, bestPredLines = predAbsLines, predLines
            bestPredAbsLine, bestPredLine = predAbsLine, predLine

    return bestPredAbsLine, bestPredLine, bestPredAbsLines, bestPredLines, isConcretized, isExactMatch
示例#3
0
def runPerLine(srcCodeObj, srcLines, trgtLines, srcAbsLines, trgtAbsLines,
               errSet, lineNums, predAtK):
    '''For each compiler error line, call predErrLine'''
    srcErrLines, srcErrAbsLines = [], []
    predErrLines, predErrAbsLines = [], []
    repairLines, repairAbsLines = copy.deepcopy(srcLines), copy.deepcopy(
        srcAbsLines)
    isConcretized, isExactMatch = None, None

    # For each compiler flagged lineNums
    for lineNum in lineNums:
        lineNum = int(lineNum)

        if lineNum <= min([len(srcLines), len(srcAbsLines)
                           ]):  # If compiler returned valid line-num
            srcLine, srcAbsLine = srcLines[lineNum - 1], srcAbsLines[
                lineNum - 1]  # lineNum-1 since off-by-one
            trgtLine, trgtAbsLine = None, None
            if lineNum <= min([len(trgtLines),
                               len(trgtAbsLines)]) and lineNum > 0:
                trgtLine, trgtAbsLine = trgtLines[lineNum -
                                                  1], trgtAbsLines[lineNum - 1]
            srcErrLines.append(srcLine), srcErrAbsLines.append(srcAbsLine)

            # Use ErrSet at line=lineNum? Or at program-level
            errSetLine = errSet
            if flagErrSet_Line:
                errSetLine = ClusterError.getErrSetStr(AllErrs,
                                                       srcCodeObj,
                                                       lineNum=lineNum)

            # Predict@K the concrete repair line
            predAbsLine, predLine, repairAbsLines, repairLines, tempIsConcretized, tempIsExactMatch = repairErrLine(srcCodeObj, \
                repairLines, repairAbsLines, srcAbsLine, trgtLine, trgtAbsLine, errSetLine, lineNum, \
                predErrAbsLines, predErrLines, predAtK)

            # Concretization success?
            isConcretized = H.NoneAnd(isConcretized, tempIsConcretized)
            isExactMatch = H.NoneAnd(isExactMatch, tempIsExactMatch)

            # Record the predicted abstract and concrete line
            if predAbsLine is not None:
                predErrAbsLines.append(predAbsLine)
                predErrLines.append(predLine)

    predText = H.joinList(repairLines)
    return predText, srcErrLines, predErrLines, srcErrAbsLines, predErrAbsLines, isConcretized, isExactMatch
示例#4
0
def createClass(fnameDataset):
    '''Given a dataset (CSV) file, replace old error-IDs (obtained using regex) with new ones (obtained using Clang LLVM)'''
    df = pd.read_csv(fnameDataset, encoding="ISO-8859-1")
    allErrs = getAllErrs(CF.fname_newErrIDs)
    classes, classesRepeat, newErrSets = [], [], []
    mult = 10

    for i, row in df.iterrows():
        oldClass = row['errSet_diffs']
        codeObj = Code(row['sourceText'])

        newErrsetStr = getErrSetStr(allErrs, codeObj)
        newClass = newErrsetStr + '\n' + H.joinList(oldClass.splitlines()[1:])

        newErrSets.append(newErrsetStr)
        classes.append(newClass)

        if i >= len(df) * mult / 100:
            print(str(mult) + '%', end=' ', flush=True)
            mult += 10

    df['class'] = classes
    df['newErrSet'] = newErrSets
    df.to_csv(fnameDataset, index=False)
示例#5
0
def getErrSetStr(allErrs, codeObj, lineNum=None):
    errSet = getErrSet(allErrs, codeObj, lineNum)
    return H.joinList(errSet, ';') + ';'
示例#6
0
 def __str__(self):
     return H.joinList(self.abstractTokens, ' ')
示例#7
0
def printProgAbstraction():
    codeText = open(CF.inputPath + 'temp.c').read()
    codeObj = Code(codeText)
    absLines = getProgAbstraction(codeObj)
    for line in absLines:
        print(H.joinList(line, ' '))