예제 #1
0
def getBestClassifierForEachProgram(possibleTargetColumns, possiblePrograms, possibleClassifiers, bestProgram_Classifier_Parameters, writeFile = False):
	bestProgram_Classifier = pd.DataFrame()

	for targetColumn in possibleTargetColumns:
		baseFolder = '{}/ML/Results/{}/Classification'.format(os.getcwd(), targetColumn)
		fileName = '{}/bestClassifierForEachProgram.csv'.format(baseFolder)

		if writeFile:

			# Percorre todos os programas
			for programName in possiblePrograms:
				program_ClassifierMetrics = pd.DataFrame()
				#print('Column: {} | Program: {}'.format(targetColumn, programName))

				# Percorre todos os classificadores daquele programa		
				for classifier in possibleClassifiers:
					programFileName = '{}/{}_{}.csv'.format(baseFolder, programName, classifier)
					fileExists = util.pathExists(programFileName)
					if fileExists:
						# Obtém as métricas do programa / classificador
						accuracy, precision, recall, f1 = getMLMetricsFromClassificationFile(programFileName, targetColumn, programName)
						newMutantsMetrics = pd.DataFrame(data=[[programName, accuracy * 100, precision * 100, recall * 100, f1 * 100, classifier]], columns=['ProgramName', 'Accuracy', 'Precision', 'Recall', 'F1', 'Classifier'])
						program_ClassifierMetrics = program_ClassifierMetrics.append(newMutantsMetrics)

				# Verifica qual o melhor parâmetro para aquele programa
				if fileExists:
					bestClassifier = program_ClassifierMetrics.sort_values('F1', ascending=False).head(n=1)['Classifier'].values[0]
					bestFile = '{}/{}_{}.csv'.format(baseFolder, programName, bestClassifier)
					newFile = '{}/{}.csv'.format(baseFolder, programName)

					bestParameter = bestProgram_Classifier_Parameters.query('Column == \'{}\' and Program == \'{}\' and Classifier == \'{}\''.format(targetColumn, programName, bestClassifier))
					if not bestParameter.empty:
						bestParameter = bestParameter['Parameter'].values[0]
					else:
						bestParameter = ''
					newBestProgram_Classifier = pd.DataFrame(data=[[targetColumn, programName, bestClassifier, bestParameter]], columns=['Column', 'Program', 'Classifier', 'Parameter'])
					bestProgram_Classifier = bestProgram_Classifier.append(newBestProgram_Classifier)

					copyfile(bestFile, newFile)
			
			# Escreve o arquivo
			bestProgram_Classifier['Program.UPPER'] = bestProgram_Classifier["Program"].str.upper()
			bestProgram_Classifier = bestProgram_Classifier.sort_values(by=['Column', 'Program.UPPER'])
			del bestProgram_Classifier['Program.UPPER']

			util.writeDataFrameInCsvFile(fileName, bestProgram_Classifier.query('Column == \'{}\''.format(targetColumn)))
		else:
			newBestProgram_Classifier = util.createDataFrameFromCSV(fileName, hasHeader=True, columnIndex=0)
			bestProgram_Classifier = pd.concat([bestProgram_Classifier, newBestProgram_Classifier])

			bestProgram_Classifier['Program.UPPER'] = bestProgram_Classifier["Program"].str.upper()
			bestProgram_Classifier = bestProgram_Classifier.sort_values(by=['Column', 'Program.UPPER'])
			del bestProgram_Classifier['Program.UPPER']

	return bestProgram_Classifier
예제 #2
0
def summarizeClassifications(targetColumn,
                             possiblePrograms,
                             df_Programs_BestClassifiers,
                             overwrite=False):
    '''
		Função responsável por fazer a análise dos resultados das classificações dos mutantes dos programas e sumarizar todos os resultados.
	'''
    baseFolder = '{}/ML/Results/{}/Classification'.format(
        os.getcwd(), targetColumn)
    fileName = 'ClassificationSummary'
    summaryFile = '{}/ML/Results/{}/Classification/{}.csv'.format(
        os.getcwd(), targetColumn, fileName)

    mutantsData = pd.DataFrame()

    df_Programs_BestClassifiers = df_Programs_BestClassifiers.query(
        'Column == \'{}\''.format(targetColumn))

    if overwrite or not util.pathExists(summaryFile):

        for programName in possiblePrograms:
            bestClassifier = df_Programs_BestClassifiers.query(
                'ProgramName == \'{}\''.format(
                    programName)).loc[:, 'Classifier'].values[0]
            programFileBestClassifier = '{}/{}_{}.csv'.format(
                baseFolder, programName, bestClassifier)

            if util.pathExists(programFileBestClassifier):
                classificationResult = util.createDataFrameFromCSV(
                    programFileBestClassifier, hasHeader=True)

                mutantsData = mutantsData.append(classificationResult,
                                                 ignore_index=True)

        util.writeDataFrameInCsvFile(summaryFile, mutantsData)

        return mutantsData
    elif util.pathExists(summaryFile):
        return util.createDataFrameFromCSV(summaryFile, True)
예제 #3
0
def main(_baseExperimentFolder, _baseFolder, executionMode):
    #executionMode |    1 - Run and analyze
    #                   2 - Run
    #                   3 - Analyze

    if int(executionMode) >= 1 and int(executionMode) <= 3:
        print(
            '####################################################################'
        )
        print(
            '#\t   Executing script to find minimal mutants properties\t   #')
        print('#\t\t      ' + util.formatNow() + '\t\t\t   #')
        print(
            '####################################################################'
        )

        ####################
        # Set main variables
        ####################
        baseExperimentFolder = _baseExperimentFolder
        baseFolder = _baseFolder
        sourceFile = baseFolder[baseFolder.rfind("/") + 1:]
        sessionName = sourceFile
        executableFile = sessionName
        executionType = "research"
        directory = baseFolder
        #unitName = util.getContentFromFile("{baseFolder}/unit.txt".format(baseFolder = baseFolder))
        units = util.getContentFromFile(
            "{baseFolder}/unit.txt".format(baseFolder=baseFolder))

        if int(executionMode) == 1 or int(executionMode) == 2:
            #################
            # Execute proteum
            #################
            executeProteum(baseFolder, sourceFile, sessionName, executableFile,
                           executionType, directory, units)

        if int(executionMode) == 1 or int(executionMode) == 3:
            #####################
            # Get minimal mutants
            #####################
            print('\n##### \tBuscando mutantes minimais ' + util.formatNow() +
                  '\t#####')
            minimalMutants = getMinimalMutants(baseFolder, sourceFile)

            ######################
            # Simplifying GFC file
            ######################
            # Desabilitado pois estou utilizando GFC já passados
            #print ('\n##### \tSimplificando arquivo GFC ' + util.formatNow() + '\t#####'         )
            #prot2PokeMain("{baseFolder}/__{sourceFile}.gfc".format(
            #baseFolder = baseFolder, sourceFile = sourceFile))

            ################################
            # Get basic mutants informations
            ################################
            print('\n##### \tBuscando e calculando informações dos mutantes ' +
                  util.formatNow() + '\t#####')
            mutantsHeader, mutantsInfo = getMutantsInfo(
                baseFolder, minimalMutants, sessionName, units)

            ################################################
            # Write csv File with basic mutants informations
            ################################################
            print('\n##### \tGerando arquivo com informações dos mutantes ' +
                  util.formatNow() + '\t#####')
            fileNameResults = "{baseFolder}/log/{sessionName}_result.csv".format(
                sessionName=sessionName, baseFolder=baseFolder)
            util.writeInCsvFile(fileNameResults, mutantsInfo, mutantsHeader)

            ###########################################################
            # Write mutants info to compute machine learning algorithms
            ###########################################################
            datasetBaseFolder = '{}/ML/Dataset'.format(
                util.getPreviousFolder(baseExperimentFolder))
            ########################
            ### --- Minimals --- ###
            essentialInfo = computeEssencialInfo(mutantsInfo,
                                                 minimal_Equivalent=0)

            # Gera apenas um arquivo com todos os mutantes
            essentialFileName = '{}/MINIMAL/mutants.csv'.format(
                datasetBaseFolder)
            util.writeDataFrameInCsvFile(
                essentialFileName,
                essentialInfo,
                sep=',',
                mode='a+',
                header=True
                if util.pathExists(essentialFileName) == False else False,
                index=False)

            # Gera um arquivo para cada programa com todos os seus mutantes
            essentialFileName = '{}/MINIMAL/Programs/{}.csv'.format(
                datasetBaseFolder, sessionName)
            util.writeDataFrameInCsvFile(essentialFileName,
                                         essentialInfo,
                                         sep=',',
                                         mode='w+',
                                         header=True,
                                         index=False)

            ###########################
            ### --- Equivalents --- ###
            essentialInfo = computeEssencialInfo(mutantsInfo,
                                                 minimal_Equivalent=1)

            # Gera apenas um arquivo com todos os mutantes
            essentialFileName = '{}/EQUIVALENT/mutants.csv'.format(
                datasetBaseFolder)
            util.writeDataFrameInCsvFile(
                essentialFileName,
                essentialInfo,
                sep=',',
                mode='a+',
                header=True
                if util.pathExists(essentialFileName) == False else False,
                index=False)

            # Gera um arquivo para cada programa com todos os seus mutantes
            essentialFileName = '{}/EQUIVALENT/Programs/{}.csv'.format(
                datasetBaseFolder, sessionName)
            util.writeDataFrameInCsvFile(essentialFileName,
                                         essentialInfo,
                                         sep=',',
                                         mode='w+',
                                         header=True,
                                         index=False)
예제 #4
0
def classify(newDataSetFileName, resultDataSetFileName, targetColumn,
             classifier, algorithmParameter, programToClassify):
    """ Function responsable to classify a new data set as equivalent or minimal from predictive models are existing

		Args:
			newDataSetFileName (str): File name containing new mutants to be classified
			resultDataSetFileName (str): File name to be generated with the classification result. This file contains the same row number than 'newDataSetFileName'.
			targetColumn (str): Column to be classified. Must be 'MINIMAL' ou 'EQUIVALENT'.
			classifier (str): The classifier algorithm used to predict the new data inputed. Must be 'KNN', 'DT', 'RF', 'SVM', 'LDA', 'LR' or 'GNB'
			algorithmParameter (int): The parameter to be used on classifier. This parameter Must be K, as the number of neighbors on KNN, or min sample split to Decision Tree and RandomForest.
			programToClassify(str): ---
	"""

    ######################
    # --- Setting datasets
    targetColumnName = targetColumn
    targetColumn = '_IM_{}'.format(targetColumn)
    trainDataSetFileName = 'ML/Dataset/{}/mutants.csv'.format(targetColumnName)

    if targetColumn == '_IM_MINIMAL':
        #####################
        # --- Setting columns
        columnNames = getColumnNames_lastMinimal()

    elif targetColumn == '_IM_EQUIVALENT':
        #####################
        # --- Setting columns
        columnNames = getColumnNames_lastEquivalent()

    ###################
    # --- PreProcessing

    # --- Import
    trainDataSet = importDataSet(trainDataSetFileName)
    trainDataSet = trainDataSet.query(
        '_IM_PROGRAM != \'{}\''.format(programToClassify))
    newDataSetFrame = importDataSet(newDataSetFileName)

    # --- PreProccess
    operatorsToTrain = list(set(trainDataSet['_IM_OPERATOR'].values))
    typeStatementsToTrain = list(set(
        trainDataSet['_IM_TYPE_STATEMENT'].values))
    operatorsToTest = list(set(newDataSetFrame['_IM_OPERATOR'].values))
    typeStatementsToTest = list(
        set(newDataSetFrame['_IM_TYPE_STATEMENT'].values))

    allOperators = list(set(operatorsToTrain + operatorsToTest))
    allTypeStatement = list(set(typeStatementsToTrain + typeStatementsToTest))

    trainDataSetFrame, numProperties, numColumnsToDelete_train, _, _, groupedDataSetFrame = preProcessing(
        trainDataSet, targetColumn, columnNames, [], [], allOperators,
        allTypeStatement)
    newDataSetFrame, numProperties, numColumnsToDelete_test, _, _, groupedDataSetFrame = preProcessing(
        newDataSetFrame, targetColumn, columnNames, [], [], allOperators,
        allTypeStatement, False)

    # Separate the data into X (values) and y (target value)
    X_train = trainDataSetFrame.iloc[:, :-1].values
    X_test = newDataSetFrame.iloc[:, :-1].values
    y_train = trainDataSetFrame.iloc[:, numProperties +
                                     numColumnsToDelete_train].values

    ##############################################################################
    # --- Classify and write new CSV with informations about the prediction result
    y_test = trainingAndPredictions(classifier, algorithmParameter, X_train,
                                    y_train, X_test)

    # Create an array with the results of prediction | 1 for correct, 0 for incorrect
    result = [
        1 if predicted == groupedDataSetFrame[targetColumn][iCount] else 0
        for iCount, predicted in zip(range(len(y_test)), y_test)
    ]

    ##############################
    # --- Metrics about prediction
    #total = len(result)
    #correct = result.count(1)
    #perc = correct * 100 / total
    #print('Total: {} | Correto: {} | Perc: {}'.format(total, correct, perc))

    predictedDF = pd.DataFrame(groupedDataSetFrame)
    predictedDF['PREDICTED'] = y_test
    predictedDF['RESULT'] = result

    onlyResultDataSetFileName = str(resultDataSetFileName).replace(
        '.csv', '_result.csv')
    util.writeInCsvFile(onlyResultDataSetFileName,
                        [str(value) for value in y_test])
    util.writeDataFrameInCsvFile(resultDataSetFileName, predictedDF)
예제 #5
0
def analyzeClassificationsFromEachProgram(targetColumn,
                                          possiblePrograms,
                                          bestProgram_Classifier,
                                          overwrite=False):
    '''
		Função responsável por fazer a análise dos resultados das classificações dos mutantes dos programas e obter as métricas para cada programa
	'''
    baseFolder = '{}/ML/Results/{}/Classification'.format(
        os.getcwd(), targetColumn)
    fileName = 'ML_Metrics'
    # Falta criar o arquivo '{}/Metrics_AllClassifiers.csv'.format(baseFolder) neste arquivo
    metricsFile = '{}/ML/Results/{}/Classification/{}.csv'.format(
        os.getcwd(), targetColumn, fileName)

    mutantsMetrics = pd.DataFrame()

    if overwrite or not util.pathExists(metricsFile):

        for file in util.getFilesInFolder(baseFolder):
            programName = util.getPathName(file)
            programName = programName[:programName.find('.')]

            if programName in possiblePrograms:
                programInfo_ClassifierParameter = bestProgram_Classifier.query(
                    'Column == \'{}\' and Program == \'{}\''.format(
                        targetColumn, programName))
                classifier = programInfo_ClassifierParameter[
                    'Classifier'].values[0]
                parameter = programInfo_ClassifierParameter[
                    'Parameter'].values[0]

                accuracy, precision, recall, f1 = getMLMetricsFromClassificationFile(
                    file, targetColumn, programName)
                newMutantsMetrics = pd.DataFrame(data=[[
                    programName, targetColumn, classifier, parameter,
                    accuracy * 100, precision * 100, recall * 100, f1 * 100
                ]],
                                                 columns=[
                                                     'ProgramName', 'Column',
                                                     'Classifier', 'Parameter',
                                                     'Accuracy', 'Precision',
                                                     'Recall', 'F1'
                                                 ])
                mutantsMetrics = mutantsMetrics.append(newMutantsMetrics)

                #print('Program: {}\tClassifier: {} | Parameter: {}\t\tAccuracy: {} | Precision: {} | Recall: {} | F1: {}'.format(programName, classifier, parameter, accuracy, precision, recall, f1))

        mutantsMetrics['ProgramName.UPPER'] = mutantsMetrics[
            "ProgramName"].str.upper()
        mutantsMetrics = mutantsMetrics.sort_values(
            by=['Column', 'ProgramName.UPPER'])
        del mutantsMetrics['ProgramName.UPPER']

        util.writeDataFrameInCsvFile(metricsFile, mutantsMetrics)
    elif util.pathExists(metricsFile):
        mutantsMetrics = util.createDataFrameFromCSV(metricsFile, True)

        mutantsMetrics['ProgramName.UPPER'] = mutantsMetrics[
            "ProgramName"].str.upper()
        mutantsMetrics = mutantsMetrics.sort_values(
            by=['Column', 'ProgramName.UPPER'])
        del mutantsMetrics['ProgramName.UPPER']

    return mutantsMetrics
예제 #6
0
def analyzeResults(possibleTargetColumns,
                   possibleClassifiers,
                   overwriteFullFile=False):
    """
	Analyze each of 30 run for each classifier with each target column and calculate the best (calculating the mean) metric for each ones (classifier and target column).
	
    Parameters
    ----------
	possibleTargetColumns (list)
		Array containing all possible columns that can be sorted (MINIMAL and EQUIVALENT)
	possibleClassifiers (list)
		Array containing all possible classifiers that can be used (KNN, DT, RF, SVM, LDA, LR and GNB)
	overwriteFullFile (bool)
		Boolean indicating whether the file 'Summary_All30Runs' should be overwritten.


	Returns
	-------
	(DataFrame, DataFrame, DataFrame)
		experimentResults - Dataframe containing all values of all predictive models and 30 executions. Corresponds to the file 'Summary_All30Runs.csv'
		bestParameterResults - Dataframe containing all values of the predictive models that was the best and 30 executions. Corresponds to the file 'Summary_BestClassifiers_All30Runs.csv'
		classifiersBestParameter - Dataframe containing the average values of the classifiers with the best results. Corresponds to the 'Summary_Classifiers.csv' file
	"""

    # Dataframe with all execution results# Dataframe with all execution results
    experimentResults = pd.DataFrame()

    # Dataframe with the indication of the best parameter for each classifier
    summaryClassifiersBestParameter = pd.DataFrame()

    # Base folder
    baseResultsFolderName = '{}/ML/Results'.format(os.getcwd())

    # FullFile
    fullFileName = '{}/Summary/Summary_All30Runs.csv'.format(
        baseResultsFolderName)

    # FullFile for the best classifiers
    fullBestClassifiersFileName = '{}/Summary/Summary_BestClassifiers_All30Runs.csv'.format(
        baseResultsFolderName)

    # SummaryFile
    summaryFileName = '{}/Summary/Summary_Classifiers.csv'.format(
        baseResultsFolderName)

    # CustomParameterResults
    summaryCustomParametersFileName = '{}/Summary/Summary_CustomParameters.csv'.format(
        baseResultsFolderName)

    if overwriteFullFile or not util.pathExists(fullFileName):
        # If you are going to overwrite the existing file or if it does not exist, do all the reading and write a new file

        # Cycles through all columns (Minimal and equivalent)
        for targetColumn in possibleTargetColumns:
            # Cycle through all classifiers
            for classifier in possibleClassifiers:
                # Dataframe containing the result of the 30 executions of each program
                classifierResults = pd.DataFrame()

                # Cycles through all executions (from 1 to 30)
                for iCount in range(1, 31, 1):
                    # Fetch execution results
                    classifierRunResultFileName = '{}/{}_{}/{}.csv'.format(
                        baseResultsFolderName, targetColumn, iCount,
                        classifier)
                    classifierRunResult = util.createDataFrameFromCSV(
                        classifierRunResultFileName,
                        hasHeader=True,
                        separator=';',
                        initialLine=5)

                    # Concatenates the result of this execution to the other executions that in the end will be 30
                    classifierResults = classifierResults.append(
                        classifierRunResult)

                    # Insert information related to that execution
                    pd.DataFrame(classifierRunResult).insert(
                        0, 'TargetColumn', targetColumn, True)
                    pd.DataFrame(classifierRunResult).insert(
                        1, 'Classifier', classifier, True)
                    pd.DataFrame(classifierRunResult).insert(
                        2, 'Run', iCount, True)

                    # Concatenates this execution to all others
                    experimentResults = experimentResults.append(
                        classifierRunResult)

                # Calculates the best parameter for that classifier
                parameters = classifierResults['SampleSplit'].unique()
                parameterMetrics = pd.DataFrame()
                for parameter in parameters:
                    # Search only the results of that parameter
                    resultsFromThisParameter = classifierResults.query(
                        'SampleSplit == \'{}\''.format(parameter))

                    # Collect the average of the metrics of the 30 executions
                    meanAccuracy = np.mean(
                        resultsFromThisParameter['Accuracy'])
                    meanPrecision = np.mean(
                        resultsFromThisParameter['Precision'])
                    meanRecall = np.mean(resultsFromThisParameter['Recall'])
                    meanF1 = np.mean(resultsFromThisParameter['F1'])
                    stdF1 = np.std(resultsFromThisParameter['F1'])

                    parameterMetrics = parameterMetrics.append(
                        pd.DataFrame(data=[[
                            targetColumn, classifier, parameter, meanAccuracy,
                            meanPrecision, meanRecall, meanF1, stdF1
                        ]],
                                     columns=[
                                         'TargetColumn', 'Classifier',
                                         'Parameter', 'Accuracy', 'Precision',
                                         'Recall', 'F1', 'StdDevF1'
                                     ]))
                    #print('Parameter: {}\t\tAccuracy: {} | Precision: {} | Recall: {} | F1: {}'.format(parameter, meanAccuracy, meanPrecision, meanRecall, meanF1))

                bestParameter = parameterMetrics.sort_values(
                    by=['F1'], ascending=False).head(n=1)
                #print('\n--- {} - {}'.format(classifier, targetColumn))
                #print(bestParameter.head())

                summaryClassifiersBestParameter = summaryClassifiersBestParameter.append(
                    bestParameter)

        #print(classifiersBestParameter.head(n=50))

        # Write a file with all the results
        util.writeDataFrameInCsvFile(fullFileName,
                                     experimentResults,
                                     index=False)

        # Write a file with only the best parameters for each classifier
        util.writeDataFrameInCsvFile(summaryFileName,
                                     summaryClassifiersBestParameter,
                                     index=False)

        # Write a file with all the results but only for the best parameters
        # Exclude the non best executions
        bestParameterResults = getRunsOnlyFromBestParameter(
            experimentResults, summaryClassifiersBestParameter,
            possibleTargetColumns)
        util.writeDataFrameInCsvFile(fullBestClassifiersFileName,
                                     bestParameterResults,
                                     index=False)

    else:
        # Search for existing files
        experimentResults = util.createDataFrameFromCSV(
            fullFileName, True, ',')
        summaryClassifiersBestParameter = util.createDataFrameFromCSV(
            summaryFileName, True, ',')
        bestParameterResults = util.createDataFrameFromCSV(
            fullBestClassifiersFileName, True, ',')

    # Get the results from custom parameters
    customParameterResults = summarizeRunsFromCustomParameter(
        getRunsFromCustomParameters(experimentResults))
    util.writeDataFrameInCsvFile(summaryCustomParametersFileName,
                                 customParameterResults,
                                 index=False)

    return experimentResults, bestParameterResults, summaryClassifiersBestParameter
예제 #7
0
def getBestClassifierForPrograms(program=None,
                                 targetColumn=None,
                                 write=True,
                                 overwrite=False):
    '''
		Function responsible to analyze the results from all programs with all classifiers
		If the parameter 'writeFile' is True, this function writes a file with the result
		Returns a dataframe with the best classifier for each program and this metrics
	'''

    # SetUp
    possibleTargetColumns, possibleClassifiers, possiblePrograms = setUp()

    # Indicate the best classifier for each classifier - ProgramName, Column, Classifier, Accuracy, Precision, Recall, F1
    df_Programs_BestClassifiers = pd.DataFrame()

    # Set possible none parameters
    if program is not None:
        programs = [program]
    else:
        programs = possiblePrograms

    # Set possible none parameters
    if targetColumn is not None:
        columns = [targetColumn]
    else:
        columns = possibleTargetColumns

    mustWrite = write and program is None and targetColumn is None

    if mustWrite:
        for _column in columns:
            baseFolder = '{}/ML/Results/{}/Classification'.format(
                os.getcwd(), _column)

            # Dataframe containing programs, the best classifier for each one and the metrics achieved
            df_Program_Metrics_BestClassifier = pd.DataFrame()

            # Dataframe containing programs and all classifiers for each one and the metrics achieved
            df_Program_Metrics_AllClassifiers = pd.DataFrame()

            for programName in programs:
                df_Program_Classifiers_Metrics = pd.DataFrame()

                for classifier in possibleClassifiers:
                    fileName = '{}/{}_{}.csv'.format(baseFolder, programName,
                                                     classifier)

                    if util.pathExists(fileName):
                        classificationResult = util.createDataFrameFromCSV(
                            fileName, hasHeader=True)

                        y_correct = classificationResult.loc[:, '_IM_{}'.
                                                             format(_column
                                                                    )].values
                        y_predicted = classificationResult.loc[:,
                                                               'PREDICTED'].values

                        #accuracy, precision, recall, f1, TPR, FPR, TP, FN, FP, TN = evaluatingClassification(y_correct, y_predicted)
                        accuracy, precision, recall, f1, _, _, _, _, _, _ = evaluatingClassification(
                            y_correct, y_predicted)

                        #print('Program: {} | Column: {} | Classifier: {} | Accuracy: {} | Precision: {} | Recall: {} | F1: {}'.format(programName, _column, classifier, accuracy, precision, recall, f1))
                        newDataFrame = pd.DataFrame(data=[[
                            programName, _column, classifier, accuracy * 100,
                            precision * 100, recall * 100, f1 * 100
                        ]],
                                                    columns=[
                                                        'ProgramName',
                                                        'Column', 'Classifier',
                                                        'Parameter',
                                                        'Accuracy',
                                                        'Precision', 'Recall',
                                                        'F1'
                                                    ])
                        df_Program_Classifiers_Metrics = df_Program_Classifiers_Metrics.append(
                            newDataFrame)

                df_Program_Metrics_AllClassifiers = df_Program_Metrics_AllClassifiers.append(
                    df_Program_Classifiers_Metrics.sort_values(
                        'F1', ascending=False))

                # Classify here the best classifier for these program
                df_Program_Classifiers_Metrics = df_Program_Classifiers_Metrics.sort_values(
                    'F1', ascending=False).head(n=1)
                df_Program_Metrics_BestClassifier = df_Program_Metrics_BestClassifier.append(
                    df_Program_Classifiers_Metrics)
                df_Programs_BestClassifiers = df_Programs_BestClassifiers.append(
                    df_Program_Classifiers_Metrics)

            # Save the dataframe into file if is specified to read all program files
            if mustWrite:
                fileName = '{}/ML_Metrics.csv'.format(baseFolder)
                fileNameAllClassifiers = '{}/Metrics_AllClassifiers.csv'.format(
                    baseFolder)
                if not util.pathExists(fileName) or overwrite:
                    util.writeDataFrameInCsvFile(
                        fileName, df_Program_Metrics_BestClassifier)
                    util.writeDataFrameInCsvFile(
                        fileNameAllClassifiers,
                        df_Program_Metrics_AllClassifiers)
    else:
        for _column in columns:
            baseFolder = '{}/ML/Results/{}/Classification'.format(
                os.getcwd(), _column)
            fileName = '{}/ML_Metrics.csv'.format(baseFolder)

            df_Programs_BestClassifiers = df_Programs_BestClassifiers.append(
                util.createDataFrameFromCSV(fileName, True))

    return df_Programs_BestClassifiers