示例#1
0
def test_imputer(
        failure_logger,
        random_seed,
        int_dataset,  # noqa: F811
        strategy,
        missing_values,
        add_indicator):
    zero_filled, one_filled, nan_filled = int_dataset
    if missing_values == 0:
        X_np, X = zero_filled
    elif missing_values == 1:
        X_np, X = one_filled
    else:
        X_np, X = nan_filled
    np.random.seed(random_seed)
    fill_value = np.random.randint(10, size=1)[0]

    imputer = cuSimpleImputer(copy=True,
                              missing_values=missing_values,
                              strategy=strategy,
                              fill_value=fill_value,
                              add_indicator=add_indicator)
    t_X = imputer.fit_transform(X)
    assert type(t_X) == type(X)

    imputer = skSimpleImputer(copy=True,
                              missing_values=missing_values,
                              strategy=strategy,
                              fill_value=fill_value,
                              add_indicator=add_indicator)
    sk_t_X = imputer.fit_transform(X_np)

    assert_allclose(t_X, sk_t_X)
示例#2
0
def test_imputer_sparse(
        sparse_imputer_dataset,  # noqa: F811
        strategy):
    missing_values, X_sp, X = sparse_imputer_dataset

    if X.format == 'csr':
        pytest.skip("Skipping CSR matrices")

    fill_value = np.random.randint(10, size=1)[0]

    imputer = cuSimpleImputer(copy=True,
                              missing_values=missing_values,
                              strategy=strategy,
                              fill_value=fill_value)
    t_X = imputer.fit_transform(X)
    #  assert type(t_X) == type(X)
    if cpx.scipy.sparse.issparse(X):
        assert cpx.scipy.sparse.issparse(t_X)
    if scipy.sparse.issparse(X):
        assert scipy.sparse.issparse(t_X)

    imputer = skSimpleImputer(copy=True,
                              missing_values=missing_values,
                              strategy=strategy,
                              fill_value=fill_value)
    sk_t_X = imputer.fit_transform(X_sp)
    assert_allclose(t_X, sk_t_X)
示例#3
0
def test_imputer_sparse(failure_logger, random_seed,
                        sparse_int_dataset, strategy,  # noqa: F811
                        missing_values):
    X_np, X = sparse_int_dataset

    if X.format == 'csr':
        pytest.skip("Skipping CSR matrices")

    X_sp = X_np.tocsc()

    np.random.seed(random_seed)
    if np.isnan(missing_values):
        # Adding nan when missing value is nan
        random_loc = np.random.choice(X.nnz,
                                      int(X.nnz * 0.1),
                                      replace=False)
        X_sp.data[random_loc] = np.nan
        X = X.copy()
        X.data[random_loc] = np.nan

    fill_value = np.random.randint(10, size=1)[0]

    imputer = cuSimpleImputer(copy=True, missing_values=missing_values,
                              strategy=strategy, fill_value=fill_value)
    t_X = imputer.fit_transform(X)
    #  assert type(t_X) == type(X)
    if cp.sparse.issparse(X):
        assert cp.sparse.issparse(t_X)
    if scipy.sparse.issparse(X):
        assert scipy.sparse.issparse(t_X)

    imputer = skSimpleImputer(copy=True, missing_values=missing_values,
                              strategy=strategy, fill_value=fill_value)
    sk_t_X = imputer.fit_transform(X_sp)
    assert_allclose(t_X, sk_t_X)
示例#4
0
def test__repr__():
    assert cuStandardScaler().__repr__() == 'StandardScaler()'
    assert cuMinMaxScaler().__repr__() == 'MinMaxScaler()'
    assert cuMaxAbsScaler().__repr__() == 'MaxAbsScaler()'
    assert cuNormalizer().__repr__() == 'Normalizer()'
    assert cuBinarizer().__repr__() == 'Binarizer()'
    assert cuPolynomialFeatures().__repr__() == 'PolynomialFeatures()'
    assert cuSimpleImputer().__repr__() == 'SimpleImputer()'
    assert cuRobustScaler().__repr__() == 'RobustScaler()'
    assert cuKBinsDiscretizer().__repr__() == 'KBinsDiscretizer()'