def test_initialize_variants(): # Test NNDSVD variants correctness # Test that the variants 'nndsvda' and 'nndsvdar' differ from basic # 'nndsvd' only where the basic version has zeros. rng = np.random.mtrand.RandomState(42) data = np.abs(rng.randn(10, 10)) W0, H0 = nmf._initialize_nmf(data, 10, init='nndsvd') Wa, Ha = nmf._initialize_nmf(data, 10, init='nndsvda') War, Har = nmf._initialize_nmf(data, 10, init='nndsvdar', random_state=0) for ref, evl in ((W0, Wa), (W0, War), (H0, Ha), (H0, Har)): assert_almost_equal(evl[ref != 0], ref[ref != 0])
def test_initialize_nn_output(): # Test that initialization does not return negative values rng = np.random.mtrand.RandomState(42) data = np.abs(rng.randn(10, 10)) for init in ('random', 'nndsvd', 'nndsvda', 'nndsvdar'): W, H = nmf._initialize_nmf(data, 10, init=init, random_state=0) assert not ((W < 0).any() or (H < 0).any())
def test_nmf_decreasing(): # test that the objective function is decreasing at each iteration n_samples = 20 n_features = 15 n_components = 10 alpha = 0.1 l1_ratio = 0.5 tol = 0. # initialization rng = np.random.mtrand.RandomState(42) X_full = rng.randn(n_samples, n_features) np.abs(X_full, X_full) # add missing values X_nan = X_full.copy() X_nan[rng.randint(2, size=(n_samples, n_features)) > 0] = np.nan W0, H0 = nmf._initialize_nmf(X_full, n_components, init='random', random_state=42) for X in [X_full, X_nan]: for beta_loss in (-1.2, 0, 0.2, 1., 2., 2.5): for solver in ('cd', 'mu'): if X is X_nan and solver != 'mu': # not implemented continue if solver != 'mu' and beta_loss != 2: # not implemented continue W, H = W0.copy(), H0.copy() previous_loss = None for _ in range(30): # one more iteration starting from the previous results W, H, _ = non_negative_factorization( X, W, H, beta_loss=beta_loss, init='custom', n_components=n_components, max_iter=1, alpha=alpha, solver=solver, tol=tol, l1_ratio=l1_ratio, verbose=0, regularization='both', random_state=0, update_H=True) loss = nmf._beta_divergence(X, W, H, beta_loss) if previous_loss is not None: assert_greater(previous_loss, loss) previous_loss = loss
def test_initialize_close(): # Test NNDSVD error # Test that _initialize_nmf error is less than the standard deviation of # the entries in the matrix. rng = np.random.mtrand.RandomState(42) A = np.abs(rng.randn(10, 10)) W, H = nmf._initialize_nmf(A, 10, init='nndsvd') error = linalg.norm(np.dot(W, H) - A) sdev = linalg.norm(A - A.mean()) assert error <= sdev
def test_beta_divergence(): # Compare _beta_divergence with the reference, with and without NaN n_samples = 20 n_features = 10 n_components = 5 # initialization rng = np.random.mtrand.RandomState(42) X = rng.randn(n_samples, n_features) np.clip(X, 0, None, out=X) W, H = nmf._initialize_nmf(X, n_components, init='random', random_state=42) # with dense X _compare_beta_divergence_with_ref(X, W, H) # with sparse X _compare_beta_divergence_with_ref(sp.csr_matrix(X), W, H) # with dense X and missing values X[rng.randn(n_samples, n_features) > 0] = np.nan _compare_beta_divergence_with_ref(X, W, H)
def test_nmf_multiplicative_update_sparse(): # Compare sparse and dense input in multiplicative update NMF # Also test continuity of the results with respect to beta_loss parameter n_samples = 20 n_features = 10 n_components = 5 alpha = 0.1 l1_ratio = 0.5 n_iter = 20 # initialization rng = np.random.mtrand.RandomState(1337) X = rng.randn(n_samples, n_features) X = np.abs(X) X_csr = sp.csr_matrix(X) W0, H0 = nmf._initialize_nmf(X, n_components, init='random', random_state=42) for beta_loss in (-1.2, 0, 0.2, 1., 2., 2.5): # Reference with dense array X W, H = W0.copy(), H0.copy() W1, H1, _ = non_negative_factorization(X, W, H, n_components, init='custom', update_H=True, solver='mu', beta_loss=beta_loss, max_iter=n_iter, alpha=alpha, l1_ratio=l1_ratio, regularization='both', random_state=42) # Compare with sparse X W, H = W0.copy(), H0.copy() W2, H2, _ = non_negative_factorization(X_csr, W, H, n_components, init='custom', update_H=True, solver='mu', beta_loss=beta_loss, max_iter=n_iter, alpha=alpha, l1_ratio=l1_ratio, regularization='both', random_state=42) assert_array_almost_equal(W1, W2, decimal=7) assert_array_almost_equal(H1, H2, decimal=7) # Compare with almost same beta_loss, since some values have a specific # behavior, but the results should be continuous w.r.t beta_loss beta_loss -= 1.e-5 W, H = W0.copy(), H0.copy() W3, H3, _ = non_negative_factorization(X_csr, W, H, n_components, init='custom', update_H=True, solver='mu', beta_loss=beta_loss, max_iter=n_iter, alpha=alpha, l1_ratio=l1_ratio, regularization='both', random_state=42) assert_array_almost_equal(W1, W3, decimal=4) assert_array_almost_equal(H1, H3, decimal=4)