Exemplo n.º 1
0
def splitDataFirstMode():
    # use all available data
    sampleData = dp.returnData(1, randomSeed=10)
    saveDataset(sampleData, 'AD_dataset')

    # split data into X and y
    da = DataAnalysis(dirName=dirName)
    X, y = da.splitXY(sampleData)

    # split data into training (80%) and testing (20%) set
    xTrain, xTest, yTrain, yTest = da.splitTrainTest(X, y, trainSize=0.8)
    saveDataset(xTrain.assign(normality=yTrain.values), 'AD_set_train')
    saveDataset(xTest.assign(normality=yTest.values), 'AD_set_test')

    # get data characteristics for current dataset size
    dataInfo[1] = da.getDataCharacteristics(yTrain, yTest)

    # Run predictions
    for datasetSize in (selectedSizes or [0.2, 0.4, 0.6, 0.8, 1]):
        # get no. of samples and prediction accuracy
        noOfSamples, predictions = da.getScores(
            xTrain,
            xTest,
            yTrain,
            yTest,
            trainSize=datasetSize,
            randomSeeds=randomSeeds,
            selectedAlgorithms=selectedAlgorithms
        ) if selectedAlgorithms else da.getScores(xTrain,
                                                  yTrain,
                                                  xTest,
                                                  yTest,
                                                  trainSize=datasetSize,
                                                  randomSeeds=randomSeeds)

        # save results for graphs and .csv files
        savePredictionScores(noOfSamples, predictions, datasetSize)
Exemplo n.º 2
0
def normalMode():
    da = DataAnalysis(dirName=dirName)

    # Run predictions
    for datasetSize in (selectedSizes or [0.2, 0.4, 0.6, 0.8, 1]):
        # get the percentage of all data
        sampleData = dp.returnData(datasetSize, randomSeed=10)
        saveDataset(sampleData, 'AD_dataset')

        # split data into X and y
        X, y = da.splitXY(sampleData)

        # split data into training (80%) and testing (20%) set
        xTrain, xTest, yTrain, yTest = da.splitTrainTest(X, y, trainSize=0.8)

        # get data characteristics for current dataset size
        dataInfo[datasetSize] = da.getDataCharacteristics(yTrain, yTest)

        # get no. of samples and prediction accuracy (trainSize is set to 1, so no further data splitting is done)
        # multiple runs make no sense here, since we always take whole set
        noOfSamples, predictions = da.getScores(
            xTrain,
            xTest,
            yTrain,
            yTest,
            trainSize=1,
            randomSeeds=[10],
            selectedAlgorithms=selectedAlgorithms
        ) if selectedAlgorithms else da.getScores(xTrain,
                                                  yTrain,
                                                  xTest,
                                                  yTest,
                                                  trainSize=datasetSize,
                                                  randomSeeds=randomSeeds)

        # save results for graphs and .csv files
        savePredictionScores(noOfSamples, predictions, datasetSize)