def compute_bench(samples_range, features_range, q=3, rank=50): it = 0 results = defaultdict(lambda: []) max_it = len(samples_range) * len(features_range) for n_samples in samples_range: for n_features in features_range: it += 1 print '====================' print 'Iteration %03d of %03d' % (it, max_it) print '====================' X = low_rank_fat_tail(n_samples, n_features, effective_rank=rank, tail_strength=0.2) gc.collect() print "benching scipy svd: " tstart = time() svd(X, full_matrices=False) results['scipy svd'].append(time() - tstart) gc.collect() print "benching scikit-learn fast_svd: q=0" tstart = time() fast_svd(X, rank, q=0) results['scikit-learn fast_svd (q=0)'].append(time() - tstart) gc.collect() print "benching scikit-learn fast_svd: q=%d " % q tstart = time() fast_svd(X, rank, q=q) results['scikit-learn fast_svd (q=%d)' % q].append(time() - tstart) return results
def test_fast_svd_infinite_rank(): """Check that extmath.fast_svd can handle noisy matrices""" n_samples = 100 n_features = 500 rank = 5 k = 10 # let us try again without 'low_rank component': just regularly but slowly # decreasing singular values: the rank of the data matrix is infinite X = low_rank_fat_tail(n_samples, n_features, effective_rank=rank, tail_strength=1.0, seed=0) assert_equal(X.shape, (n_samples, n_features)) # compute the singular values of X using the slow exact method _, s, _ = linalg.svd(X, full_matrices=False) # compute the singular values of X using the fast approximate method without # the iterated power method _, sa, _ = fast_svd(X, k, q=0) # the approximation does not tolerate the noise: assert np.abs(s[:k] - sa).max() > 0.1 # compute the singular values of X using the fast approximate method with # iterated power method _, sap, _ = fast_svd(X, k, q=5) # the iterated power method is still managing to get most of the structure # at the requested rank assert_almost_equal(s[:k], sap, decimal=3)
def test_fast_svd_low_rank(): """Check that extmath.fast_svd is consistent with linalg.svd""" n_samples = 100 n_features = 500 rank = 5 k = 10 # generate a matrix X of approximate effective rank `rank` and no noise # component (very structured signal): X = low_rank_fat_tail(n_samples, n_features, effective_rank=rank, tail_strength=0.0, seed=0) assert_equal(X.shape, (n_samples, n_features)) # compute the singular values of X using the slow exact method U, s, V = linalg.svd(X, full_matrices=False) # compute the singular values of X using the fast approximate method Ua, sa, Va = fast_svd(X, k) assert_equal(Ua.shape, (n_samples, k)) assert_equal(sa.shape, (k,)) assert_equal(Va.shape, (k, n_features)) # ensure that the singular values of both methods are equal up to the real # rank of the matrix assert_almost_equal(s[:k], sa) # check the singular vectors too (while not checking the sign) assert_almost_equal(np.dot(U[:, :k], V[:k, :]), np.dot(Ua, Va)) # check the sparse matrix representation X = sparse.csr_matrix(X) # compute the singular values of X using the fast approximate method Ua, sa, Va = fast_svd(X, k) assert_almost_equal(s[:rank], sa[:rank])
def test_fast_svd_low_rank_with_noise(): """Check that extmath.fast_svd can handle noisy matrices""" n_samples = 100 n_features = 500 rank = 5 k = 10 # generate a matrix X wity structure approximate rank `rank` and an # important noisy component X = low_rank_fat_tail(n_samples, n_features, effective_rank=rank, tail_strength=0.5, seed=0) assert_equal(X.shape, (n_samples, n_features)) # compute the singular values of X using the slow exact method _, s, _ = linalg.svd(X, full_matrices=False) # compute the singular values of X using the fast approximate method without # the iterated power method _, sa, _ = fast_svd(X, k, q=0) # the approximation does not tolerate the noise: assert np.abs(s[:k] - sa).max() > 0.05 # compute the singular values of X using the fast approximate method with # iterated power method _, sap, _ = fast_svd(X, k, q=5) # the iterated power method is helping getting rid of the noise: assert_almost_equal(s[:k], sap, decimal=3)
def compute_bench(samples_range, features_range, rank=50, tolerance=1e-7): it = 0 timeset = defaultdict(lambda: []) err = defaultdict(lambda: []) max_it = len(samples_range) * len(features_range) for n_samples in samples_range: for n_features in features_range: it += 1 print "====================" print "Iteration %03d of %03d" % (it, max_it) print "====================" X = np.abs(low_rank_fat_tail(n_samples, n_features, effective_rank=rank, tail_strength=0.2)) gc.collect() print "benching nndsvd-nmf: " tstart = time() m = NMF(n_components=30, tol=tolerance, init="nndsvd").fit(X) tend = time() - tstart timeset["nndsvd-nmf"].append(tend) err["nndsvd-nmf"].append(m.reconstruction_err_) print m.reconstruction_err_, tend gc.collect() print "benching nndsvda-nmf: " tstart = time() m = NMF(n_components=30, init="nndsvda", tol=tolerance).fit(X) tend = time() - tstart timeset["nndsvda-nmf"].append(tend) err["nndsvda-nmf"].append(m.reconstruction_err_) print m.reconstruction_err_, tend gc.collect() print "benching nndsvdar-nmf: " tstart = time() m = NMF(n_components=30, init="nndsvdar", tol=tolerance).fit(X) tend = time() - tstart timeset["nndsvdar-nmf"].append(tend) err["nndsvdar-nmf"].append(m.reconstruction_err_) print m.reconstruction_err_, tend gc.collect() print "benching random-nmf" tstart = time() m = NMF(n_components=30, init=None, max_iter=1000, tol=tolerance).fit(X) tend = time() - tstart timeset["random-nmf"].append(tend) err["random-nmf"].append(m.reconstruction_err_) print m.reconstruction_err_, tend gc.collect() print "benching alt-random-nmf" tstart = time() W, H = alt_nnmf(X, r=30, R=None, tol=tolerance) tend = time() - tstart timeset["alt-random-nmf"].append(tend) err["alt-random-nmf"].append(np.linalg.norm(X - np.dot(W, H))) print np.linalg.norm(X - np.dot(W, H)), tend return timeset, err
def test_fast_svd_transpose_consistency(): """Check that transposing the design matrix has limit impact""" n_samples = 100 n_features = 500 rank = 4 k = 10 X = low_rank_fat_tail(n_samples, n_features, effective_rank=rank, tail_strength=0.5, seed=0) assert_equal(X.shape, (n_samples, n_features)) U1, s1, V1 = fast_svd(X, k, q=3, transpose=False, rng=0) U2, s2, V2 = fast_svd(X, k, q=3, transpose=True, rng=0) U3, s3, V3 = fast_svd(X, k, q=3, transpose='auto', rng=0) U4, s4, V4 = linalg.svd(X, full_matrices=False) assert_almost_equal(s1, s4[:k], decimal=3) assert_almost_equal(s2, s4[:k], decimal=3) assert_almost_equal(s3, s4[:k], decimal=3) assert_almost_equal(np.dot(U1, V1), np.dot(U4[:, :k], V4[:k, :]), decimal=2) assert_almost_equal(np.dot(U2, V2), np.dot(U4[:, :k], V4[:k, :]), decimal=2) # in this case 'auto' is equivalent to transpose assert_almost_equal(s2, s3)
def compute_bench(samples_range, features_range, rank=50, tolerance=1e-7): it = 0 timeset = defaultdict(lambda: []) err = defaultdict(lambda: []) max_it = len(samples_range) * len(features_range) for n_samples in samples_range: for n_features in features_range: it += 1 print '====================' print 'Iteration %03d of %03d' % (it, max_it) print '====================' X = np.abs(low_rank_fat_tail(n_samples, n_features, effective_rank=rank, tail_strength=0.2)) gc.collect() print "benching nndsvd-nmf: " tstart = time() m = NMF(n_components=30, tol=tolerance, init='nndsvd').fit(X) tend = time() - tstart timeset['nndsvd-nmf'].append(tend) err['nndsvd-nmf'].append(m.reconstruction_err_) print m.reconstruction_err_, tend gc.collect() print "benching nndsvda-nmf: " tstart = time() m = NMF(n_components=30, init='nndsvda', tol=tolerance).fit(X) tend = time() - tstart timeset['nndsvda-nmf'].append(tend) err['nndsvda-nmf'].append(m.reconstruction_err_) print m.reconstruction_err_, tend gc.collect() print "benching nndsvdar-nmf: " tstart = time() m = NMF(n_components=30, init='nndsvdar', tol=tolerance).fit(X) tend = time() - tstart timeset['nndsvdar-nmf'].append(tend) err['nndsvdar-nmf'].append(m.reconstruction_err_) print m.reconstruction_err_, tend gc.collect() print "benching random-nmf" tstart = time() m = NMF(n_components=30, init=None, max_iter=1000, tol=tolerance).fit(X) tend = time() - tstart timeset['random-nmf'].append(tend) err['random-nmf'].append(m.reconstruction_err_) print m.reconstruction_err_, tend gc.collect() print "benching alt-random-nmf" tstart = time() W, H = alt_nnmf(X, r=30, R=None, tol=tolerance) tend = time() - tstart timeset['alt-random-nmf'].append(tend) err['alt-random-nmf'].append(np.linalg.norm(X - np.dot(W, H))) print np.linalg.norm(X - np.dot(W, H)), tend return timeset, err