def test_imputer( failure_logger, random_seed, int_dataset, # noqa: F811 strategy, missing_values, add_indicator): zero_filled, one_filled, nan_filled = int_dataset if missing_values == 0: X_np, X = zero_filled elif missing_values == 1: X_np, X = one_filled else: X_np, X = nan_filled np.random.seed(random_seed) fill_value = np.random.randint(10, size=1)[0] imputer = cuSimpleImputer(copy=True, missing_values=missing_values, strategy=strategy, fill_value=fill_value, add_indicator=add_indicator) t_X = imputer.fit_transform(X) assert type(t_X) == type(X) imputer = skSimpleImputer(copy=True, missing_values=missing_values, strategy=strategy, fill_value=fill_value, add_indicator=add_indicator) sk_t_X = imputer.fit_transform(X_np) assert_allclose(t_X, sk_t_X)
def test_imputer_sparse( sparse_imputer_dataset, # noqa: F811 strategy): missing_values, X_sp, X = sparse_imputer_dataset if X.format == 'csr': pytest.skip("Skipping CSR matrices") fill_value = np.random.randint(10, size=1)[0] imputer = cuSimpleImputer(copy=True, missing_values=missing_values, strategy=strategy, fill_value=fill_value) t_X = imputer.fit_transform(X) # assert type(t_X) == type(X) if cpx.scipy.sparse.issparse(X): assert cpx.scipy.sparse.issparse(t_X) if scipy.sparse.issparse(X): assert scipy.sparse.issparse(t_X) imputer = skSimpleImputer(copy=True, missing_values=missing_values, strategy=strategy, fill_value=fill_value) sk_t_X = imputer.fit_transform(X_sp) assert_allclose(t_X, sk_t_X)
def test_imputer_sparse(failure_logger, random_seed, sparse_int_dataset, strategy, # noqa: F811 missing_values): X_np, X = sparse_int_dataset if X.format == 'csr': pytest.skip("Skipping CSR matrices") X_sp = X_np.tocsc() np.random.seed(random_seed) if np.isnan(missing_values): # Adding nan when missing value is nan random_loc = np.random.choice(X.nnz, int(X.nnz * 0.1), replace=False) X_sp.data[random_loc] = np.nan X = X.copy() X.data[random_loc] = np.nan fill_value = np.random.randint(10, size=1)[0] imputer = cuSimpleImputer(copy=True, missing_values=missing_values, strategy=strategy, fill_value=fill_value) t_X = imputer.fit_transform(X) # assert type(t_X) == type(X) if cp.sparse.issparse(X): assert cp.sparse.issparse(t_X) if scipy.sparse.issparse(X): assert scipy.sparse.issparse(t_X) imputer = skSimpleImputer(copy=True, missing_values=missing_values, strategy=strategy, fill_value=fill_value) sk_t_X = imputer.fit_transform(X_sp) assert_allclose(t_X, sk_t_X)
def test__repr__(): assert cuStandardScaler().__repr__() == 'StandardScaler()' assert cuMinMaxScaler().__repr__() == 'MinMaxScaler()' assert cuMaxAbsScaler().__repr__() == 'MaxAbsScaler()' assert cuNormalizer().__repr__() == 'Normalizer()' assert cuBinarizer().__repr__() == 'Binarizer()' assert cuPolynomialFeatures().__repr__() == 'PolynomialFeatures()' assert cuSimpleImputer().__repr__() == 'SimpleImputer()' assert cuRobustScaler().__repr__() == 'RobustScaler()' assert cuKBinsDiscretizer().__repr__() == 'KBinsDiscretizer()'