def test_imputation_error_invalid_strategy(strategy): X = np.ones((3, 5)) X[0, 0] = np.nan with pytest.raises(ValueError, match=str(strategy)): imputer = SimpleImputer(strategy=strategy) imputer.fit_transform(X)
def test_imputation_deletion_warning(strategy): X = np.ones((3, 5)) X[:, 0] = np.nan with pytest.warns(UserWarning, match="Deleting"): imputer = SimpleImputer(strategy=strategy, verbose=True) imputer.fit_transform(X)
def test_imputation_mean_median_error_invalid_type(strategy, dtype): X = np.array([["a", "b", 3], [4, "e", 6], ["g", "h", 9]], dtype=dtype) msg = "non-numeric data:\ncould not convert string to float: '" with pytest.raises(ValueError, match=msg): imputer = SimpleImputer(strategy=strategy) imputer.fit_transform(X)
def test_imputation_constant_error_invalid_type(X_data, missing_value): # Verify that exceptions are raised on invalid fill_value type X = np.full((3, 5), X_data, dtype=float) X[0, 0] = missing_value with pytest.raises(ValueError, match="imputing numerical"): imputer = SimpleImputer(missing_values=missing_value, strategy="constant", fill_value="x") imputer.fit_transform(X)
def test_imputation_mean_median_error_invalid_type_list_pandas(strategy, type): X = [["a", "b", 3], [4, "e", 6], ["g", "h", 9]] if type == 'dataframe': pd = pytest.importorskip("pandas") X = pd.DataFrame(X) msg = "non-numeric data:\ncould not convert string to float: '" with pytest.raises(ValueError, match=msg): imputer = SimpleImputer(strategy=strategy) imputer.fit_transform(X)
def test_imputation_shape(strategy): # Verify the shapes of the imputed matrix for different strategies. X = np.random.randn(10, 2) X[::2] = np.nan imputer = SimpleImputer(strategy=strategy) X_imputed = imputer.fit_transform(sparse.csr_matrix(X)) assert X_imputed.shape == (10, 2) X_imputed = imputer.fit_transform(X) assert X_imputed.shape == (10, 2) iterative_imputer = IterativeImputer(initial_strategy=strategy) X_imputed = iterative_imputer.fit_transform(X) assert X_imputed.shape == (10, 2)
def test_simple_imputation_add_indicator_sparse_matrix(arr_type): X_sparse = arr_type([ [np.nan, 1, 5], [2, np.nan, 1], [6, 3, np.nan], [1, 2, 9] ]) X_true = np.array([ [3., 1., 5., 1., 0., 0.], [2., 2., 1., 0., 1., 0.], [6., 3., 5., 0., 0., 1.], [1., 2., 9., 0., 0., 0.], ]) imputer = SimpleImputer(missing_values=np.nan, add_indicator=True) X_trans = imputer.fit_transform(X_sparse) assert sparse.issparse(X_trans) assert X_trans.shape == X_true.shape assert_allclose(X_trans.toarray(), X_true)
def test_imputation_constant_object(marker): # Test imputation using the constant strategy on objects X = np.array([ [marker, "a", "b", marker], ["c", marker, "d", marker], ["e", "f", marker, marker], ["g", "h", "i", marker] ], dtype=object) X_true = np.array([ ["missing", "a", "b", "missing"], ["c", "missing", "d", "missing"], ["e", "f", "missing", "missing"], ["g", "h", "i", "missing"] ], dtype=object) imputer = SimpleImputer(missing_values=marker, strategy="constant", fill_value="missing") X_trans = imputer.fit_transform(X) assert_array_equal(X_trans, X_true)
def test_imputation_constant_integer(): # Test imputation using the constant strategy on integers X = np.array([ [-1, 2, 3, -1], [4, -1, 5, -1], [6, 7, -1, -1], [8, 9, 0, -1] ]) X_true = np.array([ [0, 2, 3, 0], [4, 0, 5, 0], [6, 7, 0, 0], [8, 9, 0, 0] ]) imputer = SimpleImputer(missing_values=-1, strategy="constant", fill_value=0) X_trans = imputer.fit_transform(X) assert_array_equal(X_trans, X_true)
def test_imputation_constant_pandas(dtype): # Test imputation using the constant strategy on pandas df pd = pytest.importorskip("pandas") f = io.StringIO("Cat1,Cat2,Cat3,Cat4\n" ",i,x,\n" "a,,y,\n" "a,j,,\n" "b,j,x,") df = pd.read_csv(f, dtype=dtype) X_true = np.array([ ["missing_value", "i", "x", "missing_value"], ["a", "missing_value", "y", "missing_value"], ["a", "j", "missing_value", "missing_value"], ["b", "j", "x", "missing_value"] ], dtype=object) imputer = SimpleImputer(strategy="constant") X_trans = imputer.fit_transform(df) assert_array_equal(X_trans, X_true)
def test_imputation_constant_float(array_constructor): # Test imputation using the constant strategy on floats X = np.array([ [np.nan, 1.1, 0, np.nan], [1.2, np.nan, 1.3, np.nan], [0, 0, np.nan, np.nan], [1.4, 1.5, 0, np.nan] ]) X_true = np.array([ [-1, 1.1, 0, -1], [1.2, -1, 1.3, -1], [0, 0, -1, -1], [1.4, 1.5, 0, -1] ]) X = array_constructor(X) X_true = array_constructor(X_true) imputer = SimpleImputer(strategy="constant", fill_value=-1) X_trans = imputer.fit_transform(X) assert_allclose_dense_sparse(X_trans, X_true)