Пример #1
0
def test_missing_indicator_sparse_param(arr_type, missing_values,
                                        param_sparse):
    # check the format of the output with different sparse parameter
    X_fit = np.array([[missing_values, missing_values, 1],
                      [4, missing_values, 2]])
    X_trans = np.array([[missing_values, missing_values, 1], [4, 12, 10]])
    X_fit = arr_type(X_fit).astype(np.float64)
    X_trans = arr_type(X_trans).astype(np.float64)

    indicator = MissingIndicator(missing_values=missing_values,
                                 sparse=param_sparse)
    X_fit_mask = indicator.fit_transform(X_fit)
    X_trans_mask = indicator.transform(X_trans)

    if param_sparse is True:
        assert X_fit_mask.format == 'csc'
        assert X_trans_mask.format == 'csc'
    elif param_sparse == 'auto' and missing_values == 0:
        assert isinstance(X_fit_mask, np.ndarray)
        assert isinstance(X_trans_mask, np.ndarray)
    elif param_sparse is False:
        assert isinstance(X_fit_mask, np.ndarray)
        assert isinstance(X_trans_mask, np.ndarray)
    else:
        if sparse.issparse(X_fit):
            assert X_fit_mask.format == 'csc'
            assert X_trans_mask.format == 'csc'
        else:
            assert isinstance(X_fit_mask, np.ndarray)
            assert isinstance(X_trans_mask, np.ndarray)
Пример #2
0
def test_missing_indicator_sparse_no_explicit_zeros():
    # Check that non missing values don't become explicit zeros in the mask
    # generated by missing indicator when X is sparse. (#13491)
    X = sparse.csr_matrix([[0, 1, 2], [1, 2, 0], [2, 0, 1]])

    mi = MissingIndicator(features='all', missing_values=1)
    Xt = mi.fit_transform(X)

    assert Xt.getnnz() == Xt.sum()
Пример #3
0
def test_missing_indicator_no_missing():
    # check that all features are dropped if there are no missing values when
    # features='missing-only' (#13491)
    X = np.array([[1, 1], [1, 1]])

    mi = MissingIndicator(features='missing-only', missing_values=-1)
    Xt = mi.fit_transform(X)

    assert Xt.shape[1] == 0
Пример #4
0
def test_missing_indicator_new(missing_values, arr_type, dtype, param_features,
                               n_features, features_indices):
    X_fit = np.array([[missing_values, missing_values, 1],
                      [4, 2, missing_values]])
    X_trans = np.array([[missing_values, missing_values, 1], [4, 12, 10]])
    X_fit_expected = np.array([[1, 1, 0], [0, 0, 1]])
    X_trans_expected = np.array([[1, 1, 0], [0, 0, 0]])

    # convert the input to the right array format and right dtype
    X_fit = arr_type(X_fit).astype(dtype)
    X_trans = arr_type(X_trans).astype(dtype)
    X_fit_expected = X_fit_expected.astype(dtype)
    X_trans_expected = X_trans_expected.astype(dtype)

    indicator = MissingIndicator(missing_values=missing_values,
                                 features=param_features,
                                 sparse=False)
    X_fit_mask = indicator.fit_transform(X_fit)
    X_trans_mask = indicator.transform(X_trans)

    assert X_fit_mask.shape[1] == n_features
    assert X_trans_mask.shape[1] == n_features

    assert_array_equal(indicator.features_, features_indices)
    assert_allclose(X_fit_mask, X_fit_expected[:, features_indices])
    assert_allclose(X_trans_mask, X_trans_expected[:, features_indices])

    assert X_fit_mask.dtype == bool
    assert X_trans_mask.dtype == bool
    assert isinstance(X_fit_mask, np.ndarray)
    assert isinstance(X_trans_mask, np.ndarray)

    indicator.set_params(sparse=True)
    X_fit_mask_sparse = indicator.fit_transform(X_fit)
    X_trans_mask_sparse = indicator.transform(X_trans)

    assert X_fit_mask_sparse.dtype == bool
    assert X_trans_mask_sparse.dtype == bool
    assert X_fit_mask_sparse.format == 'csc'
    assert X_trans_mask_sparse.format == 'csc'
    assert_allclose(X_fit_mask_sparse.toarray(), X_fit_mask)
    assert_allclose(X_trans_mask_sparse.toarray(), X_trans_mask)
Пример #5
0
def test_missing_indicator_raise_on_sparse_with_missing_0(arr_type):
    # test for sparse input and missing_value == 0

    missing_values = 0
    X_fit = np.array([[missing_values, missing_values, 1],
                      [4, missing_values, 2]])
    X_trans = np.array([[missing_values, missing_values, 1], [4, 12, 10]])

    # convert the input to the right array format
    X_fit_sparse = arr_type(X_fit)
    X_trans_sparse = arr_type(X_trans)

    indicator = MissingIndicator(missing_values=missing_values)

    with pytest.raises(ValueError, match="Sparse input with missing_values=0"):
        indicator.fit_transform(X_fit_sparse)

    indicator.fit_transform(X_fit)
    with pytest.raises(ValueError, match="Sparse input with missing_values=0"):
        indicator.transform(X_trans_sparse)
Пример #6
0
def test_missing_indicator_string():
    X = np.array([['a', 'b', 'c'], ['b', 'c', 'a']], dtype=object)
    indicator = MissingIndicator(missing_values='a', features='all')
    X_trans = indicator.fit_transform(X)
    assert_array_equal(X_trans,
                       np.array([[True, False, False], [False, False, True]]))