def test_iterative_imputer_early_stopping(): rng = np.random.RandomState(0) n = 50 d = 5 A = rng.rand(n, 1) B = rng.rand(1, d) X = np.dot(A, B) nan_mask = rng.rand(n, d) < 0.5 X_missing = X.copy() X_missing[nan_mask] = np.nan imputer = IterativeImputer(max_iter=100, tol=1e-2, sample_posterior=False, verbose=1, random_state=rng) X_filled_100 = imputer.fit_transform(X_missing) assert len(imputer.imputation_sequence_) == d * imputer.n_iter_ imputer = IterativeImputer(max_iter=imputer.n_iter_, sample_posterior=False, verbose=1, random_state=rng) X_filled_early = imputer.fit_transform(X_missing) assert_allclose(X_filled_100, X_filled_early, atol=1e-7) imputer = IterativeImputer(max_iter=100, tol=0, sample_posterior=False, verbose=1, random_state=rng) imputer.fit(X_missing) assert imputer.n_iter_ == imputer.max_iter
def test_iterative_imputer_verbose(): rng = np.random.RandomState(0) n = 100 d = 3 X = sparse_random_matrix(n, d, density=0.10, random_state=rng).toarray() imputer = IterativeImputer(missing_values=0, max_iter=1, verbose=1) imputer.fit(X) imputer.transform(X) imputer = IterativeImputer(missing_values=0, max_iter=1, verbose=2) imputer.fit(X) imputer.transform(X)
def test_iterative_imputer_no_missing(): rng = np.random.RandomState(0) X = rng.rand(100, 100) X[:, 0] = np.nan m1 = IterativeImputer(max_iter=10, random_state=rng) m2 = IterativeImputer(max_iter=10, random_state=rng) pred1 = m1.fit(X).transform(X) pred2 = m2.fit_transform(X) # should exclude the first column entirely assert_allclose(X[:, 1:], pred1) # fit and fit_transform should both be identical assert_allclose(pred1, pred2)
def test_iterative_imputer_transform_stochasticity(): pytest.importorskip("scipy", minversion="0.17.0") rng1 = np.random.RandomState(0) rng2 = np.random.RandomState(1) n = 100 d = 10 X = sparse_random_matrix(n, d, density=0.10, random_state=rng1).toarray() # when sample_posterior=True, two transforms shouldn't be equal imputer = IterativeImputer(missing_values=0, max_iter=1, sample_posterior=True, random_state=rng1) imputer.fit(X) X_fitted_1 = imputer.transform(X) X_fitted_2 = imputer.transform(X) # sufficient to assert that the means are not the same assert np.mean(X_fitted_1) != pytest.approx(np.mean(X_fitted_2)) # when sample_posterior=False, and n_nearest_features=None # and imputation_order is not random # the two transforms should be identical even if rng are different imputer1 = IterativeImputer(missing_values=0, max_iter=1, sample_posterior=False, n_nearest_features=None, imputation_order='ascending', random_state=rng1) imputer2 = IterativeImputer(missing_values=0, max_iter=1, sample_posterior=False, n_nearest_features=None, imputation_order='ascending', random_state=rng2) imputer1.fit(X) imputer2.fit(X) X_fitted_1a = imputer1.transform(X) X_fitted_1b = imputer1.transform(X) X_fitted_2 = imputer2.transform(X) assert_allclose(X_fitted_1a, X_fitted_1b) assert_allclose(X_fitted_1a, X_fitted_2)