Пример #1
0
def test_incremental_basic(scheduler, xy_classification):
    X, y = xy_classification
    with scheduler() as (s, [a, b]):
        est1 = SGDClassifier(random_state=0, tol=1e-3)
        est2 = clone(est1)

        clf = Incremental(est1)
        result = clf.fit(X, y, classes=[0, 1])
        for slice_ in da.core.slices_from_chunks(X.chunks):
            est2.partial_fit(X[slice_], y[slice_[0]], classes=[0, 1])

        assert result is clf

        assert isinstance(result.estimator.coef_, np.ndarray)
        np.testing.assert_array_almost_equal(result.estimator.coef_,
                                             est2.coef_)

        assert_estimator_equal(clf.estimator, est2, exclude=['loss_function_'])

        #  Predict
        result = clf.predict(X)
        expected = est2.predict(X)
        assert isinstance(result, da.Array)
        assert_eq(result, expected)

        # score
        result = clf.score(X, y)
        expected = est2.score(X, y)
        # assert isinstance(result, da.Array)
        assert_eq(result, expected)

        clf = Incremental(SGDClassifier(random_state=0, tol=1e-3))
        clf.partial_fit(X, y, classes=[0, 1])
        assert_estimator_equal(clf.estimator, est2, exclude=['loss_function_'])
Пример #2
0
def test_incremental_basic(scheduler, dataframes):
    # Create observations that we know linear models can recover
    n, d = 100, 3
    rng = da.random.RandomState(42)
    X = rng.normal(size=(n, d), chunks=30)
    coef_star = rng.uniform(size=d, chunks=d)
    y = da.sign(X.dot(coef_star))
    y = (y + 1) / 2
    if dataframes:
        X = dd.from_array(X)
        y = dd.from_array(y)

    with scheduler() as (s, [_, _]):
        est1 = SGDClassifier(random_state=0, tol=1e-3, average=True)
        est2 = clone(est1)

        clf = Incremental(est1, random_state=0)
        result = clf.fit(X, y, classes=[0, 1])
        assert result is clf

        # est2 is a sklearn optimizer; this is just a benchmark
        if dataframes:
            X = X.to_dask_array(lengths=True)
            y = y.to_dask_array(lengths=True)

        for slice_ in da.core.slices_from_chunks(X.chunks):
            est2.partial_fit(X[slice_].compute(),
                             y[slice_[0]].compute(),
                             classes=[0, 1])

        assert isinstance(result.estimator_.coef_, np.ndarray)
        rel_error = np.linalg.norm(clf.coef_ - est2.coef_)
        rel_error /= np.linalg.norm(clf.coef_)
        assert rel_error < 0.9

        assert set(dir(clf.estimator_)) == set(dir(est2))

        #  Predict
        result = clf.predict(X)
        expected = est2.predict(X)
        assert isinstance(result, da.Array)
        if dataframes:
            # Compute is needed because chunk sizes of this array are unknown
            result = result.compute()
        rel_error = np.linalg.norm(result - expected)
        rel_error /= np.linalg.norm(expected)
        assert rel_error < 0.3

        # score
        result = clf.score(X, y)
        expected = est2.score(*dask.compute(X, y))
        assert abs(result - expected) < 0.1

        clf = Incremental(SGDClassifier(random_state=0, tol=1e-3,
                                        average=True))
        clf.partial_fit(X, y, classes=[0, 1])
        assert set(dir(clf.estimator_)) == set(dir(est2))
Пример #3
0
def test_incremental_basic(scheduler):
    # Create observations that we know linear models can recover
    n, d = 100, 3
    rng = da.random.RandomState(42)
    X = rng.normal(size=(n, d), chunks=30)
    coef_star = rng.uniform(size=d, chunks=d)
    y = da.sign(X.dot(coef_star))
    y = (y + 1) / 2

    with scheduler() as (s, [_, _]):
        est1 = SGDClassifier(random_state=0, tol=1e-3, average=True)
        est2 = clone(est1)

        clf = Incremental(est1, random_state=0)
        result = clf.fit(X, y, classes=[0, 1])
        for slice_ in da.core.slices_from_chunks(X.chunks):
            est2.partial_fit(X[slice_], y[slice_[0]], classes=[0, 1])

        assert result is clf

        assert isinstance(result.estimator_.coef_, np.ndarray)
        rel_error = np.linalg.norm(clf.coef_ - est2.coef_)
        rel_error /= np.linalg.norm(clf.coef_)
        assert rel_error < 0.9

        assert set(dir(clf.estimator_)) == set(dir(est2))

        #  Predict
        result = clf.predict(X)
        expected = est2.predict(X)
        assert isinstance(result, da.Array)
        rel_error = np.linalg.norm(result - expected)
        rel_error /= np.linalg.norm(expected)
        assert rel_error < 0.2

        # score
        result = clf.score(X, y)
        expected = est2.score(X, y)
        assert abs(result - expected) < 0.1

        clf = Incremental(SGDClassifier(random_state=0, tol=1e-3,
                                        average=True))
        clf.partial_fit(X, y, classes=[0, 1])
        assert set(dir(clf.estimator_)) == set(dir(est2))
Пример #4
0
x_testPDscaled = scalerPD.transform(x_testPD)

##### MLP Model
learnermlpPD.fit(x_trainPDscaled, y_trainPD, classes=numpy.unique(y_trainPD))
print('PD done training model')
#result = mlpPD.predict([[1,1,1,1]])
#prob_results = mlpPD.predict_proba([[1,1,1,1]])

##### Random Forest Model
##learnerrfPD.fit(x_trainPDscaled,y_trainPD, classes=numpy.unique(y_trainPD))
##
##forest_result = model.predict([[1,1,1,1]])
##forest_prob_result = model.predict_proba([[1,1,1,1]])

#####Testing
predictions_mlpPD = learnermlpPD.predict(x_testPDscaled)
##predictions_forestPD = learnerrfPD.predict(x_testPDscaled)
print('PD done predicting')

print('NN PD Confusion Matrix\n' +
      str(confusion_matrix(y_testPD, predictions_mlpPD)) + '\n')
print('NN PD Classification Report \n' +
      classification_report(y_testPD, predictions_mlpPD))
##print('RF Confusion Matrix\n' + str(confusion_matrix(y_testPD,predictions_forestPD)) + '\n')
##print('RF Classification Report \n'+ classification_report(y_testPD,predictions_forestPD))
##
############### Delay Amount

#Create one single learner instance to be used throughout this code block
mlpDA = MLPClassifier(hidden_layer_sizes=(4, 4),
                      max_iter=300,
Пример #5
0
def main():
    t0 = time.time()

    basepath = "/home/eline/OneDrive/__NiiFormat1"  # Path to the patient folders.
    patientPaths, patientIDs = getData.GetPatients(basepath)
    patientIDs = np.array(patientIDs)

    # Choose which scans to include.
    t2 = ["T2"]
    dwi = [
        "DWI_b00", "DWI_b01", "DWI_b02", "DWI_b03", "DWI_b04", "DWI_b05",
        "DWI_b06"
    ]
    ffe = []
    t1t2sense = []

    scantypes = [t2, dwi, ffe, t1t2sense]
    scans = []
    for type in scantypes:
        if type:
            scans.append(type)

    # Choose the mask/ground truth.
    maskchoice = "union"  # an, shh, intersection or union

    # Creating dictionaries to store patient image data and the masks.
    dataDict, groundTruthDict, imsizes = buildData.buildDataset(
        patientPaths, patientIDs, scans, maskchoice)

    # Choose cross-validator.
    crossvalidator = options.select_cross_validator(
        "leave-One-Out")  # K-fold or leave-One-Out

    loadtime = time.time()

    zeroIndex = {}

    dice = []

    # Train model.
    for train_index, test_index in crossvalidator.split(patientIDs):
        # First splitting the data and building dask arrays.
        trainingX, trainingY = buildData.get_data_for_training(
            dataDict, groundTruthDict, patientIDs[train_index])
        testX, testY = buildData.get_data_for_test(dataDict, groundTruthDict,
                                                   patientIDs[test_index],
                                                   zeroIndex)

        # Using incremental learning (out of core learning) because of the large amount of data.
        estimator = sklearn.linear_model.SGDClassifier(
        )  # Estimator have to have partial_fit API implemented.
        clf = Incremental(estimator, scoring='accuracy')
        clf.fit(trainingX, trainingY, classes=[True, False])
        data = clf.predict(testX)

        # Per patient predictions.
        index = 0
        for patientID in patientIDs[test_index]:
            # Get the voxels belonging to the patient.
            size = len(groundTruthDict[patientID])
            pred = data[index:index + size].compute(
            )  # compute() is needed to access the values in a Dask array.
            truth = testY[index:index + size].compute()

            # Set rows which contained at least one zero as background (0).
            for element in zeroIndex[patientID]:
                pred[element] = 0

            # Remove small areas/volumes from the predicted mask.
            pred = processResults.remove_small_areas2D(pred,
                                                       imsizes[patientID])
            #pred = processResults.remove_small_areas3D(pred, imsizes[patientID])

            # Calculate the confusion matrix.
            confusionMatrix = confusion_matrix(truth, pred)

            # Calculate the DICE score.
            diceScore = processResults.calculate_dice(confusionMatrix)
            dice.append([patientID, diceScore])

            # Save prediction as nifti file.
            filename = 'predict' + patientID + '.nii'
            predimage = processResults.array_to_image(pred, imsizes[patientID])
            sitk.WriteImage(predimage, filename)

            # Increase index to the starting index of the next patient.
            index += size

    t1 = time.time()
    print('loadtime: ' + str(loadtime - t0))
    print('traintime: ' + str(t1 - loadtime))
    print('runtime: ' + str(t1 - t0))

    # Save the DICE scores in a text file.
    processResults.save_dice_scores(dice, "diceScores")

    # Calculate the mean DSC value.
    sum = 0
    n = 0
    for i in dice:
        sum += i[1]
        n += 1
    print(sum / n)