Пример #1
0
def test_knn_n_neighbors():

    X = np.array([[0, 0], [np.nan, 2], [4, 3], [5, np.nan], [7, 7],
                  [np.nan, 8], [14, 13]])
    statistics_mean = np.nanmean(X, axis=0)

    # Test with 1 neighbor
    X_imputed_1NN = np.array([[0, 0], [4, 2], [4, 3], [5, 3], [7, 7], [7, 8],
                              [14, 13]])

    n_neighbors = 1
    imputer = KNNImputer(n_neighbors=n_neighbors)

    assert_array_equal(imputer.fit_transform(X), X_imputed_1NN)
    assert_array_equal(imputer.statistics_, statistics_mean)

    # Test with 6 neighbors
    X = np.array([[0, 0], [np.nan, 2], [4, 3], [5, np.nan], [7, 7],
                  [np.nan, 8], [14, 13]])

    X_imputed_6NN = np.array([[0, 0], [6, 2], [4, 3], [5, 5.5], [7, 7], [6, 8],
                              [14, 13]])

    n_neighbors = 6
    imputer = KNNImputer(n_neighbors=6)
    imputer_plus1 = KNNImputer(n_neighbors=n_neighbors + 1)

    assert_array_equal(imputer.fit_transform(X), X_imputed_6NN)
    assert_array_equal(imputer.statistics_, statistics_mean)
    assert_array_equal(imputer.fit_transform(X),
                       imputer_plus1.fit(X).transform(X))
Пример #2
0
def test_knn_imputation_default():
    # Test imputation with default parameter values

    # Test with an imputable matrix
    X = np.array([
        [1, 0, 0, 1],
        [2, 1, 2, np.nan],
        [3, 2, 3, np.nan],
        [np.nan, 4, 5, 5],
        [6, np.nan, 6, 7],
        [8, 8, 8, 8],
        [16, 15, 18, 19],
    ])
    statistics_mean = np.nanmean(X, axis=0)

    X_imputed = np.array([
        [1, 0, 0, 1],
        [2, 1, 2, 8],
        [3, 2, 3, 8],
        [4, 4, 5, 5],
        [6, 3, 6, 7],
        [8, 8, 8, 8],
        [16, 15, 18, 19],
    ])

    imputer = KNNImputer()
    assert_array_equal(imputer.fit_transform(X), X_imputed)
    assert_array_equal(imputer.statistics_, statistics_mean)

    # Test with % missing in row > row_max_missing
    X = np.array([
        [1, 0, 0, 1],
        [2, 1, 2, np.nan],
        [3, 2, 3, np.nan],
        [np.nan, 4, 5, 5],
        [6, np.nan, 6, 7],
        [8, 8, 8, 8],
        [19, 19, 19, 19],
        [np.nan, np.nan, np.nan, 19],
    ])
    statistics_mean = np.nanmean(X, axis=0)
    r7c0, r7c1, r7c2, _ = statistics_mean

    X_imputed = np.array([
        [1, 0, 0, 1],
        [2, 1, 2, 8],
        [3, 2, 3, 8],
        [4, 4, 5, 5],
        [6, 3, 6, 7],
        [8, 8, 8, 8],
        [19, 19, 19, 19],
        [r7c0, r7c1, r7c2, 19],
    ])

    imputer = KNNImputer()
    assert_array_almost_equal(imputer.fit_transform(X), X_imputed, decimal=6)
    assert_array_almost_equal(imputer.statistics_, statistics_mean, decimal=6)

    # Test with all neighboring donors also having missing feature values
    X = np.array([[1, 0, 0, np.nan], [2, 1, 2, np.nan], [3, 2, 3, np.nan],
                  [4, 4, 5, np.nan], [6, 7, 6, np.nan], [8, 8, 8, np.nan],
                  [20, 20, 20, 20], [22, 22, 22, 22]])
    statistics_mean = np.nanmean(X, axis=0)

    X_imputed = np.array([[1, 0, 0, 21], [2, 1, 2, 21], [3, 2, 3, 21],
                          [4, 4, 5, 21], [6, 7, 6, 21], [8, 8, 8, 21],
                          [20, 20, 20, 20], [22, 22, 22, 22]])

    imputer = KNNImputer()
    assert_array_equal(imputer.fit_transform(X), X_imputed)
    assert_array_equal(imputer.statistics_, statistics_mean)

    # Test when data in fit() and transform() are different
    X = np.array([[0, 0], [np.nan, 2], [4, 3], [5, 6], [7, 7], [9, 8],
                  [11, 16]])
    statistics_mean = np.nanmean(X, axis=0)

    Y = np.array([[1, 0], [3, 2], [4, np.nan]])

    Y_imputed = np.array([[1, 0], [3, 2], [4, 4.8]])

    imputer = KNNImputer()
    assert_array_equal(imputer.fit(X).transform(Y), Y_imputed)
    assert_array_equal(imputer.statistics_, statistics_mean)