Пример #1
0
def test_column_transformer(
        clf_dataset,
        remainder,  # noqa: F811
        transformer_weights):
    X_np, X = clf_dataset

    sk_selec1 = [0, 2]
    sk_selec2 = [1, 3]
    cu_selec1 = sk_selec1
    cu_selec2 = sk_selec2
    if isinstance(X, (pdDataFrame, cuDataFrame)):
        cu_selec1 = ['c' + str(i) for i in sk_selec1]
        cu_selec2 = ['c' + str(i) for i in sk_selec2]

    cu_transformers = [("scaler", cuStandardScaler(), cu_selec1),
                       ("normalizer", cuNormalizer(), cu_selec2)]

    transformer = cuColumnTransformer(cu_transformers,
                                      remainder=remainder,
                                      transformer_weights=transformer_weights)
    ft_X = transformer.fit_transform(X)
    t_X = transformer.transform(X)
    assert type(t_X) == type(X)

    sk_transformers = [("scaler", skStandardScaler(), sk_selec1),
                       ("normalizer", skNormalizer(), sk_selec2)]

    transformer = skColumnTransformer(sk_transformers,
                                      remainder=remainder,
                                      transformer_weights=transformer_weights)
    sk_t_X = transformer.fit_transform(X_np)

    assert_allclose(ft_X, sk_t_X)
    assert_allclose(t_X, sk_t_X)
Пример #2
0
def test_make_column_selector():
    X_np = pdDataFrame({
        'city': ['London', 'London', 'Paris', 'Sallisaw'],
        'rating': [5, 3, 4, 5],
        'temperature': [21., 21., 24., 28.]
    })
    X = cudf.from_pandas(X_np)

    cu_transformers = [("ohe", cuOneHotEncoder(),
                        cu_make_column_selector(dtype_exclude=np.number)),
                       ("scaler", cuStandardScaler(),
                        cu_make_column_selector(dtype_include=np.integer)),
                       ("normalizer", cuNormalizer(),
                        cu_make_column_selector(pattern="temp"))]
    transformer = cuColumnTransformer(cu_transformers, remainder='drop')
    t_X = transformer.fit_transform(X)

    sk_transformers = [("ohe", skOneHotEncoder(),
                        sk_make_column_selector(dtype_exclude=np.number)),
                       ("scaler", skStandardScaler(),
                        sk_make_column_selector(dtype_include=np.integer)),
                       ("normalizer", skNormalizer(),
                        sk_make_column_selector(pattern="temp"))]
    transformer = skColumnTransformer(sk_transformers, remainder='drop')
    sk_t_X = transformer.fit_transform(X_np)

    assert_allclose(t_X, sk_t_X)
    assert type(t_X) == type(X)
Пример #3
0
def test_column_transformer_get_feature_names(clf_dataset):  # noqa: F811
    X_np, X = clf_dataset

    cu_transformers = [("PolynomialFeatures", cuPolynomialFeatures(), [0, 2])]
    transformer = cuColumnTransformer(cu_transformers)
    transformer.fit_transform(X)
    cu_feature_names = transformer.get_feature_names()

    sk_transformers = [("PolynomialFeatures", skPolynomialFeatures(), [0, 2])]
    transformer = skColumnTransformer(sk_transformers)
    transformer.fit_transform(X_np)
    sk_feature_names = transformer.get_feature_names()

    assert cu_feature_names == sk_feature_names
Пример #4
0
def test_column_transformer_named_transformers_(clf_dataset):  # noqa: F811
    X_np, X = clf_dataset

    cu_transformers = [("PolynomialFeatures", cuPolynomialFeatures(), [0, 2])]
    transformer = cuColumnTransformer(cu_transformers)
    transformer.fit_transform(X)
    cu_named_transformers = transformer.named_transformers_

    sk_transformers = [("PolynomialFeatures", skPolynomialFeatures(), [0, 2])]
    transformer = skColumnTransformer(sk_transformers)
    transformer.fit_transform(X_np)
    sk_named_transformers = transformer.named_transformers_

    assert cu_named_transformers.keys() == sk_named_transformers.keys()
Пример #5
0
def test_column_transformer_sparse(sparse_clf_dataset, remainder,  # noqa: F811
                                   transformer_weights, sparse_threshold):
    X_np, X = sparse_clf_dataset

    if X.format == 'csc':
        pytest.xfail()
    dataset_density = X.nnz / X.size

    cu_transformers = [
        ("scaler", cuStandardScaler(with_mean=False), [0, 2]),
        ("normalizer", cuNormalizer(), [1, 3])
    ]

    transformer = cuColumnTransformer(cu_transformers,
                                      remainder=remainder,
                                      transformer_weights=transformer_weights,
                                      sparse_threshold=sparse_threshold)
    ft_X = transformer.fit_transform(X)
    t_X = transformer.transform(X)
    if dataset_density < sparse_threshold:
        # Sparse input -> sparse output if dataset_density > sparse_threshold
        # else sparse input -> dense output
        assert type(t_X) == type(X)

    sk_transformers = [
        ("scaler", skStandardScaler(with_mean=False), [0, 2]),
        ("normalizer", skNormalizer(), [1, 3])
    ]

    transformer = skColumnTransformer(sk_transformers,
                                      remainder=remainder,
                                      transformer_weights=transformer_weights,
                                      sparse_threshold=sparse_threshold)
    sk_t_X = transformer.fit_transform(X_np)

    assert_allclose(ft_X, sk_t_X)
    assert_allclose(t_X, sk_t_X)