Exemplo n.º 1
0
def test_imputer_sparse(
        sparse_int_dataset,
        strategy,  # noqa: F811
        missing_values):
    X_np, X = sparse_int_dataset

    if X.format == 'csr':
        pytest.skip("Skipping CSR matrices")

    X_sp = X_np.tocsc()

    if np.isnan(missing_values):
        # Adding nan when missing value is nan
        random_loc = np.random.choice(X.nnz, int(X.nnz * 0.1), replace=False)
        X_sp.data[random_loc] = np.nan
        X = X.copy()
        X.data[random_loc] = np.nan

    fill_value = np.random.randint(10, size=1)[0]

    imputer = cuSimpleImputer(copy=True,
                              missing_values=missing_values,
                              strategy=strategy,
                              fill_value=fill_value)
    t_X = imputer.fit_transform(X)
    assert type(t_X) == type(X)

    imputer = skSimpleImputer(copy=True,
                              missing_values=missing_values,
                              strategy=strategy,
                              fill_value=fill_value)
    sk_t_X = imputer.fit_transform(X_sp)
    assert_allclose(t_X, sk_t_X)
Exemplo n.º 2
0
def test__repr__():
    assert cuStandardScaler().__repr__() == 'StandardScaler()'
    assert cuMinMaxScaler().__repr__() == 'MinMaxScaler()'
    assert cuMaxAbsScaler().__repr__() == 'MaxAbsScaler()'
    assert cuNormalizer().__repr__() == 'Normalizer()'
    assert cuBinarizer().__repr__() == 'Binarizer()'
    assert cuPolynomialFeatures().__repr__() == 'PolynomialFeatures()'
    assert cuSimpleImputer().__repr__() == 'SimpleImputer()'
    assert cuRobustScaler().__repr__() == 'RobustScaler()'
    assert cuKBinsDiscretizer().__repr__() == 'KBinsDiscretizer()'
Exemplo n.º 3
0
def test_imputer(int_dataset, strategy, missing_values):  # noqa: F811
    X_np, X = int_dataset
    fill_value = np.random.randint(10, size=1)[0]

    imputer = cuSimpleImputer(copy=True, missing_values=missing_values,
                              strategy=strategy, fill_value=fill_value)
    t_X = imputer.fit_transform(X)
    assert type(t_X) == type(X)

    imputer = skSimpleImputer(copy=True, missing_values=missing_values,
                              strategy=strategy, fill_value=fill_value)
    sk_t_X = imputer.fit_transform(X_np)

    assert_allclose(t_X, sk_t_X)
Exemplo n.º 4
0
def test_imputer(int_dataset, strategy, missing_values,  # noqa: F811
                 add_indicator):
    zero_filled, one_filled, nan_filled = int_dataset
    if missing_values == 0:
        X_np, X = zero_filled
    elif missing_values == 1:
        X_np, X = one_filled
    else:
        X_np, X = nan_filled
    fill_value = np.random.randint(10, size=1)[0]

    imputer = cuSimpleImputer(copy=True, missing_values=missing_values,
                              strategy=strategy, fill_value=fill_value,
                              add_indicator=add_indicator)
    t_X = imputer.fit_transform(X)
    assert type(t_X) == type(X)

    imputer = skSimpleImputer(copy=True, missing_values=missing_values,
                              strategy=strategy, fill_value=fill_value,
                              add_indicator=add_indicator)
    sk_t_X = imputer.fit_transform(X_np)

    assert_allclose(t_X, sk_t_X)