Пример #1
0
def single_chunk_classification():
    """X, y pair for classification.

    The `X` and `y` have a single block, so chunksize is 100.
    Useful for testing `partial_fit` methods.
    """
    X, y = make_classification(chunks=100, random_state=0)
    return X, y
Пример #2
0
def single_chunk_binary_classification():
    """X, y pair for classification.

    The `X` and `y` have a single block, so chunksize is 100.
    Useful for testing `partial_fit` methods. The `X` data
    are binary features
    """
    X, y = make_classification(chunks=100, random_state=0)
    X = (abs(X) > 0).astype(int)
    return X, y
Пример #3
0
from dask.array.utils import assert_eq
from daskml.datasets import make_classification
from daskml import naive_bayes as nb
from sklearn import naive_bayes as nb_

X, y = make_classification(chunks=50)
X_ = X.compute()
y_ = y.compute()


def test_smoke():
    a = nb.GaussianNB()
    b = nb_.GaussianNB()
    a.fit(X, y)
    b.fit(X.compute(), y.compute())

    assert_eq(a.class_prior_.compute(), b.class_prior_)
    assert_eq(a.class_count_.compute(), b.class_count_)
    assert_eq(a.theta_.compute(), b.theta_)
    assert_eq(a.sigma_.compute(), b.sigma_)

    assert_eq(a.predict_proba(X).compute(), b.predict_proba(X_))
    assert_eq(a.predict(X).compute(), b.predict(X_))
    assert_eq(a.predict_log_proba(X).compute(), b.predict_log_proba(X_))


class TestBigMultinomialNB(object):
    def test_basic(self, single_chunk_count_classification):
        X, y = single_chunk_count_classification
        a = nb.BigMultinomialNB(classes=[0, 1])
        b = nb_.MultinomialNB()