示例#1
0
def test_nmf_minibatchnmf_equivalence(beta_loss):
    # Test that MiniBatchNMF is equivalent to NMF when batch_size = n_samples and
    # forget_factor 0.0 (stopping criterion put aside)
    rng = np.random.mtrand.RandomState(42)
    X = np.abs(rng.randn(48, 5))

    nmf = NMF(
        n_components=5,
        beta_loss=beta_loss,
        solver="mu",
        random_state=0,
        tol=0,
    )
    mbnmf = MiniBatchNMF(
        n_components=5,
        beta_loss=beta_loss,
        random_state=0,
        tol=0,
        max_no_improvement=None,
        batch_size=X.shape[0],
        forget_factor=0.0,
    )
    W = nmf.fit_transform(X)
    mbW = mbnmf.fit_transform(X)
    assert_allclose(W, mbW)
示例#2
0
def test_minibatch_nmf_partial_fit():
    # Check fit / partial_fit equivalence. Applicable only with fresh restarts.
    rng = np.random.mtrand.RandomState(42)
    X = np.abs(rng.randn(100, 5))

    n_components = 5
    batch_size = 10
    max_iter = 2

    mbnmf1 = MiniBatchNMF(
        n_components=n_components,
        init="custom",
        random_state=0,
        max_iter=max_iter,
        batch_size=batch_size,
        tol=0,
        max_no_improvement=None,
        fresh_restarts=False,
    )
    mbnmf2 = MiniBatchNMF(n_components=n_components, init="custom", random_state=0)

    # Force the same init of H (W is recomputed anyway) to be able to compare results.
    W, H = nmf._initialize_nmf(
        X, n_components=n_components, init="random", random_state=0
    )

    mbnmf1.fit(X, W=W, H=H)
    for i in range(max_iter):
        for j in range(batch_size):
            mbnmf2.partial_fit(X[j : j + batch_size], W=W[:batch_size], H=H)

    assert mbnmf1.n_steps_ == mbnmf2.n_steps_
    assert_allclose(mbnmf1.components_, mbnmf2.components_)
示例#3
0
def test_minibatch_nmf_transform():
    # Test that fit_transform is equivalent to fit.transform for MiniBatchNMF
    # Only guaranteed with fresh restarts
    rng = np.random.mtrand.RandomState(42)
    A = np.abs(rng.randn(6, 5))
    m = MiniBatchNMF(
        n_components=3,
        random_state=0,
        tol=1e-3,
        fresh_restarts=True,
    )
    ft = m.fit_transform(A)
    t = m.transform(A)
    assert_allclose(ft, t)
示例#4
0
def test_parameter_checking():
    A = np.ones((2, 2))
    name = "spam"

    with ignore_warnings(category=FutureWarning):
        # TODO remove in 1.2
        msg = "Invalid regularization parameter: got 'spam' instead of one of"
        with pytest.raises(ValueError, match=msg):
            NMF(regularization=name).fit(A)

    msg = "Invalid beta_loss parameter: solver 'cd' does not handle beta_loss = 1.0"
    with pytest.raises(ValueError, match=msg):
        NMF(solver="cd", beta_loss=1.0).fit(A)
    msg = "Negative values in data passed to"
    with pytest.raises(ValueError, match=msg):
        NMF().fit(-A)
    clf = NMF(2, tol=0.1).fit(A)
    with pytest.raises(ValueError, match=msg):
        clf.transform(-A)
    with pytest.raises(ValueError, match=msg):
        nmf._initialize_nmf(-A, 2, "nndsvd")

    for init in ["nndsvd", "nndsvda", "nndsvdar"]:
        msg = re.escape(
            "init = '{}' can only be used when "
            "n_components <= min(n_samples, n_features)".format(init))
        with pytest.raises(ValueError, match=msg):
            NMF(3, init=init).fit(A)
        with pytest.raises(ValueError, match=msg):
            MiniBatchNMF(3, init=init).fit(A)
        with pytest.raises(ValueError, match=msg):
            nmf._initialize_nmf(A, 3, init)
示例#5
0
def test_parameter_checking():
    # Here we only check for invalid parameter values that are not already
    # automatically tested in the common tests.

    A = np.ones((2, 2))

    msg = "Invalid beta_loss parameter: solver 'cd' does not handle beta_loss = 1.0"
    with pytest.raises(ValueError, match=msg):
        NMF(solver="cd", beta_loss=1.0).fit(A)
    msg = "Negative values in data passed to"
    with pytest.raises(ValueError, match=msg):
        NMF().fit(-A)
    clf = NMF(2, tol=0.1).fit(A)
    with pytest.raises(ValueError, match=msg):
        clf.transform(-A)
    with pytest.raises(ValueError, match=msg):
        nmf._initialize_nmf(-A, 2, "nndsvd")

    for init in ["nndsvd", "nndsvda", "nndsvdar"]:
        msg = re.escape(
            "init = '{}' can only be used when "
            "n_components <= min(n_samples, n_features)".format(init)
        )
        with pytest.raises(ValueError, match=msg):
            NMF(3, init=init).fit(A)
        with pytest.raises(ValueError, match=msg):
            MiniBatchNMF(3, init=init).fit(A)
        with pytest.raises(ValueError, match=msg):
            nmf._initialize_nmf(A, 3, init)
示例#6
0
def test_nmf_true_reconstruction():
    # Test that the fit is not too far away from an exact solution
    # (by construction)
    n_samples = 15
    n_features = 10
    n_components = 5
    beta_loss = 1
    batch_size = 3
    max_iter = 1000

    rng = np.random.mtrand.RandomState(42)
    W_true = np.zeros([n_samples, n_components])
    W_array = np.abs(rng.randn(n_samples))
    for j in range(n_components):
        W_true[j % n_samples, j] = W_array[j % n_samples]
    H_true = np.zeros([n_components, n_features])
    H_array = np.abs(rng.randn(n_components))
    for j in range(n_features):
        H_true[j % n_components, j] = H_array[j % n_components]
    X = np.dot(W_true, H_true)

    model = NMF(
        n_components=n_components,
        solver="mu",
        beta_loss=beta_loss,
        max_iter=max_iter,
        random_state=0,
    )
    transf = model.fit_transform(X)
    X_calc = np.dot(transf, model.components_)

    assert model.reconstruction_err_ < 0.1
    assert_allclose(X, X_calc)

    mbmodel = MiniBatchNMF(
        n_components=n_components,
        beta_loss=beta_loss,
        batch_size=batch_size,
        random_state=0,
        max_iter=max_iter,
    )
    transf = mbmodel.fit_transform(X)
    X_calc = np.dot(transf, mbmodel.components_)

    assert mbmodel.reconstruction_err_ < 0.1
    assert_allclose(X, X_calc, atol=1)
示例#7
0
def test_minibatch_nmf_verbose():
    # Check verbose mode of MiniBatchNMF for better coverage.
    A = np.random.RandomState(0).random_sample((100, 10))
    nmf = MiniBatchNMF(tol=1e-2, random_state=0, verbose=1)
    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        nmf.fit(A)
    finally:
        sys.stdout = old_stdout
示例#8
0
def test_minibatch_nmf_negative_beta_loss(beta_loss):
    """Check that an error is raised if beta_loss < 0 and X contains zeros."""
    rng = np.random.RandomState(0)
    X = rng.normal(size=(6, 5))
    X[X < 0] = 0

    nmf = MiniBatchNMF(beta_loss=beta_loss, random_state=0)

    msg = "When beta_loss <= 0 and X contains zeros, the solver may diverge."
    with pytest.raises(ValueError, match=msg):
        nmf.fit(X)
示例#9
0
def test_mbnmf_inverse_transform():
    # Test that MiniBatchNMF.transform followed by MiniBatchNMF.inverse_transform
    # is close to the identity
    rng = np.random.RandomState(0)
    A = np.abs(rng.randn(6, 4))
    nmf = MiniBatchNMF(
        random_state=rng,
        max_iter=500,
        init="nndsvdar",
        fresh_restarts=True,
    )
    ft = nmf.fit_transform(A)
    A_new = nmf.inverse_transform(ft)
    assert_allclose(A, A_new, rtol=1e-3, atol=1e-2)
示例#10
0
def test_minibatch_nmf_wrong_params(param, match):
    # Check that appropriate errors are raised for invalid values specific to
    # MiniBatchNMF parameters
    A = np.ones((2, 2))
    with pytest.raises(ValueError, match=match):
        MiniBatchNMF(**param).fit(A)
# accurate version of non-negative matrix factorization (:class:`decomposition.NMF`).
# :class:`MiniBatchNMF` divides the data into mini-batches and optimizes the NMF model
# in an online manner by cycling over the mini-batches, making it better suited for
# large datasets. In particular, it implements `partial_fit`, which can be used for
# online learning when the data is not readily available from the start, or when the
# data does not fit into memory.
import numpy as np
from sklearn.decomposition import MiniBatchNMF

rng = np.random.RandomState(0)
n_samples, n_features, n_components = 10, 10, 5
true_W = rng.uniform(size=(n_samples, n_components))
true_H = rng.uniform(size=(n_components, n_features))
X = true_W @ true_H

nmf = MiniBatchNMF(n_components=n_components, random_state=0)

for _ in range(10):
    nmf.partial_fit(X)

W = nmf.transform(X)
H = nmf.components_
X_reconstructed = W @ H

print(
    f"relative reconstruction error: ",
    f"{np.sum((X - X_reconstructed) ** 2) / np.sum(X**2):.5f}",
)

# %%
# BisectingKMeans: divide and cluster