def test_imputation_pickle(self): """Test for pickling imputers.""" import pickle l = 100 X = sparse_random_matrix(l, l, density=0.10) for strategy in ["mean", "median", "most_frequent"]: imputer = Imputer(missing_values=0, strategy=strategy) imputer.fit(X) imputer_pickled = pickle.loads(pickle.dumps(imputer)) assert_array_equal(imputer.transform(X.copy()), imputer_pickled.transform(X.copy()), "Fail to transform the data after pickling " "(strategy = %s)" % (strategy))
def _check_statistics(self, X, X_true, strategy, statistics, missing_values): """Utility function for testing imputation for a given strategy. Test: - along the two axes - with dense and sparse arrays Check that: - the statistics (mean, median, mode) are correct - the missing values are imputed correctly""" err_msg = "Parameters: strategy = %s, missing_values = %s, " \ "axis = {0}, sparse = {1}" % (strategy, missing_values) # Normal matrix, axis = 0 imputer = Imputer(missing_values, strategy=strategy, axis=0) X_trans = imputer.fit(X).transform(X.copy()) assert_array_equal(imputer.statistics_, statistics, err_msg.format(0, False)) assert_array_equal(X_trans, X_true, err_msg.format(0, False)) # Normal matrix, axis = 1 imputer = Imputer(missing_values, strategy=strategy, axis=1) imputer.fit(X.transpose()) if np.isnan(statistics).any(): assert_raises(ValueError, imputer.transform, X.copy().transpose()) else: X_trans = imputer.transform(X.copy().transpose()) assert_array_equal(X_trans, X_true.transpose(), err_msg.format(1, False)) # Sparse matrix, axis = 0 imputer = Imputer(missing_values, strategy=strategy, axis=0) imputer.fit(sparse.csc_matrix(X)) X_trans = imputer.transform(sparse.csc_matrix(X.copy())) if sparse.issparse(X_trans): X_trans = X_trans.toarray() assert_array_equal(imputer.statistics_, statistics, err_msg.format(0, True)) assert_array_equal(X_trans, X_true, err_msg.format(0, True)) # Sparse matrix, axis = 1 imputer = Imputer(missing_values, strategy=strategy, axis=1) imputer.fit(sparse.csc_matrix(X.transpose())) if np.isnan(statistics).any(): assert_raises(ValueError, imputer.transform, sparse.csc_matrix(X.copy().transpose())) else: X_trans = imputer.transform(sparse.csc_matrix(X.copy().transpose())) if sparse.issparse(X_trans): X_trans = X_trans.toarray() assert_array_equal(X_trans, X_true.transpose(), err_msg.format(1, True))
def test_imputation_copy(self): """Test imputation with copy""" X_orig = sparse_random_matrix(5, 5, density=0.75, random_state=0) # copy=True, dense => copy X = X_orig.copy().toarray() imputer = Imputer(missing_values=0, strategy="mean", copy=True) Xt = imputer.fit(X).transform(X) Xt[0, 0] = -1 assert_false(np.all(X == Xt)) # copy=True, sparse csr => copy X = X_orig.copy() imputer = Imputer(missing_values=X.data[0], strategy="mean", copy=True) Xt = imputer.fit(X).transform(X) Xt.data[0] = -1 assert_false(np.all(X.data == Xt.data)) # copy=False, dense => no copy X = X_orig.copy().toarray() imputer = Imputer(missing_values=0, strategy="mean", copy=False) Xt = imputer.fit(X).transform(X) Xt[0, 0] = -1 assert_true(np.all(X == Xt)) # copy=False, sparse csr, axis=1 => no copy X = X_orig.copy() imputer = Imputer(missing_values=X.data[0], strategy="mean", copy=False, axis=1) Xt = imputer.fit(X).transform(X) Xt.data[0] = -1 assert_true(np.all(X.data == Xt.data)) # copy=False, sparse csc, axis=0 => no copy X = X_orig.copy().tocsc() imputer = Imputer(missing_values=X.data[0], strategy="mean", copy=False, axis=0) Xt = imputer.fit(X).transform(X) Xt.data[0] = -1 assert_true(np.all(X.data == Xt.data)) # copy=False, sparse csr, axis=0 => copy X = X_orig.copy() imputer = Imputer(missing_values=X.data[0], strategy="mean", copy=False, axis=0) Xt = imputer.fit(X).transform(X) Xt.data[0] = -1 assert_false(np.all(X.data == Xt.data)) # copy=False, sparse csc, axis=1 => copy X = X_orig.copy().tocsc() imputer = Imputer(missing_values=X.data[0], strategy="mean", copy=False, axis=1) Xt = imputer.fit(X).transform(X) Xt.data[0] = -1 assert_false(np.all(X.data == Xt.data)) # copy=False, sparse csr, axis=1, missing_values=0 => copy X = X_orig.copy() imputer = Imputer(missing_values=0, strategy="mean", copy=False, axis=1) Xt = imputer.fit(X).transform(X) assert_false(sparse.issparse(Xt))