print("TESTING {}".format(projectName.upper())) pName = projectName.split("/")[-1] outputFile = f"{outDir}/{pName}___t{threshold}.csv" with open(outputFile, "w") as fileOut: fileOut.write( "fold,testSetSize,numFlakyTrainSet,numNonFlakyTrainSet,numFlakyTestSet,numNonFlakyTestSet,vecTime,trainTime,testTime,avgPredTime,f-measure,precision,recall,accuracy,tp,fp,fn,tn\n" ) # data points vectorization v0 = time.perf_counter() dataPointsFlaky, dataPointsNonFlaky = flast.getDataPointsInfo( projectBasePath, projectName) dataPoints = dataPointsFlaky + dataPointsNonFlaky Z = flast.flastVectorization(dataPoints, reduceDim=reduceDim, dim=dim, eps=eps) dataPointsList = np.array( [Z[i].toarray() for i in range(Z.shape[0])]) dataLabelsList = np.array([1] * len(dataPointsFlaky) + [0] * len(dataPointsNonFlaky)) v1 = time.perf_counter() vecTime = v1 - v0 for testSetSize in testSetSizes: t0 = time.time() print() print() print("TESTING {}".format(projectName.upper())) print("TESTSET SIZE:", testSetSize)
def flastKNN(outDir, projectBasePath, projectName, kf, dim, eps, k, sigma, params): v0 = time.perf_counter() dataPointsFlaky, dataPointsNonFlaky = flast.getDataPointsInfo( projectBasePath, projectName) dataPoints = dataPointsFlaky + dataPointsNonFlaky Z = flast.flastVectorization(dataPoints, dim=dim, eps=eps) dataPointsList = np.array([Z[i].toarray() for i in range(Z.shape[0])]) dataLabelsList = np.array([1] * len(dataPointsFlaky) + [0] * len(dataPointsNonFlaky)) v1 = time.perf_counter() vecTime = v1 - v0 # storage kNN = (dataPointsList, dataLabelsList) pickleDumpKNN = os.path.join(outDir, "flast-k{}-sigma{}.pickle".format(k, sigma)) with open(pickleDumpKNN, "wb") as pickleFile: pickle.dump(kNN, pickleFile) storage = os.path.getsize(pickleDumpKNN) os.remove(pickleDumpKNN) avgP, avgR = 0, 0 avgTPrep, avgTPred = 0, 0 successFold, precisionFold = 0, 0 for (trnIdx, tstIdx) in kf.split(dataPointsList, dataLabelsList): trainData, testData = dataPointsList[trnIdx], dataPointsList[tstIdx] trainLabels, testLabels = dataLabelsList[trnIdx], dataLabelsList[ tstIdx] if sum(trainLabels) == 0 or sum(testLabels) == 0: print("Skipping fold...") print(" Flaky Train Tests", sum(trainLabels)) print(" Flaky Test Tests", sum(testLabels)) continue successFold += 1 # prepare the data in the right format for kNN nSamplesTrainData, nxTrain, nyTrain = trainData.shape trainData = trainData.reshape((nSamplesTrainData, nxTrain * nyTrain)) nSamplesTestData, nxTest, nyTest = testData.shape testData = testData.reshape((nSamplesTestData, nxTest * nyTest)) trainTime, testTime, predictLabels = flast.flastClassification( trainData, trainLabels, testData, sigma, k, params) preparationTime = (vecTime * len(trainData) / len(dataPoints)) + trainTime predictionTime = (vecTime / len(dataPoints)) + (testTime / len(testData)) (precision, recall) = flast.computeResults(testLabels, predictLabels) print(precision, recall) if precision != "-": precisionFold += 1 avgP += precision avgR += recall avgTPrep += preparationTime avgTPred += predictionTime if precisionFold == 0: avgP = "-" else: avgP /= precisionFold avgR /= successFold avgTPrep /= successFold avgTPred /= successFold return (avgP, avgR, storage, avgTPrep, avgTPred)