def compute_bench(samples_range, features_range, q=3, rank=50):

    it = 0

    results = defaultdict(lambda: [])

    max_it = len(samples_range) * len(features_range)
    for n_samples in samples_range:
        for n_features in features_range:
            it += 1
            print '===================='
            print 'Iteration %03d of %03d' % (it, max_it)
            print '===================='
            X = low_rank_fat_tail(n_samples, n_features, effective_rank=rank,
                                  tail_strength=0.2)

            gc.collect()
            print "benching scipy svd: "
            tstart = time()
            svd(X, full_matrices=False)
            results['scipy svd'].append(time() - tstart)

            gc.collect()
            print "benching scikit-learn fast_svd: q=0"
            tstart = time()
            fast_svd(X, rank, q=0)
            results['scikit-learn fast_svd (q=0)'].append(time() - tstart)

            gc.collect()
            print "benching scikit-learn fast_svd: q=%d " % q
            tstart = time()
            fast_svd(X, rank, q=q)
            results['scikit-learn fast_svd (q=%d)' % q].append(time() - tstart)

    return results
示例#2
0
def test_fast_svd_infinite_rank():
    """Check that extmath.fast_svd can handle noisy matrices"""
    n_samples = 100
    n_features = 500
    rank = 5
    k = 10

    # let us try again without 'low_rank component': just regularly but slowly
    # decreasing singular values: the rank of the data matrix is infinite
    X = low_rank_fat_tail(n_samples, n_features, effective_rank=rank,
                          tail_strength=1.0, seed=0)
    assert_equal(X.shape, (n_samples, n_features))

    # compute the singular values of X using the slow exact method
    _, s, _ = linalg.svd(X, full_matrices=False)

    # compute the singular values of X using the fast approximate method without
    # the iterated power method
    _, sa, _ = fast_svd(X, k, q=0)

    # the approximation does not tolerate the noise:
    assert np.abs(s[:k] - sa).max() > 0.1

    # compute the singular values of X using the fast approximate method with
    # iterated power method
    _, sap, _ = fast_svd(X, k, q=5)

    # the iterated power method is still managing to get most of the structure
    # at the requested rank
    assert_almost_equal(s[:k], sap, decimal=3)
示例#3
0
def test_fast_svd_low_rank():
    """Check that extmath.fast_svd is consistent with linalg.svd"""
    n_samples = 100
    n_features = 500
    rank = 5
    k = 10

    # generate a matrix X of approximate effective rank `rank` and no noise
    # component (very structured signal):
    X = low_rank_fat_tail(n_samples, n_features, effective_rank=rank,
                          tail_strength=0.0, seed=0)
    assert_equal(X.shape, (n_samples, n_features))

    # compute the singular values of X using the slow exact method
    U, s, V = linalg.svd(X, full_matrices=False)

    # compute the singular values of X using the fast approximate method
    Ua, sa, Va = fast_svd(X, k)
    assert_equal(Ua.shape, (n_samples, k))
    assert_equal(sa.shape, (k,))
    assert_equal(Va.shape, (k, n_features))

    # ensure that the singular values of both methods are equal up to the real
    # rank of the matrix
    assert_almost_equal(s[:k], sa)

    # check the singular vectors too (while not checking the sign)
    assert_almost_equal(np.dot(U[:, :k], V[:k, :]), np.dot(Ua, Va))

    # check the sparse matrix representation
    X = sparse.csr_matrix(X)

    # compute the singular values of X using the fast approximate method
    Ua, sa, Va = fast_svd(X, k)
    assert_almost_equal(s[:rank], sa[:rank])
示例#4
0
def test_fast_svd_low_rank_with_noise():
    """Check that extmath.fast_svd can handle noisy matrices"""
    n_samples = 100
    n_features = 500
    rank = 5
    k = 10

    # generate a matrix X wity structure approximate rank `rank` and an
    # important noisy component
    X = low_rank_fat_tail(n_samples, n_features, effective_rank=rank,
                          tail_strength=0.5, seed=0)
    assert_equal(X.shape, (n_samples, n_features))

    # compute the singular values of X using the slow exact method
    _, s, _ = linalg.svd(X, full_matrices=False)

    # compute the singular values of X using the fast approximate method without
    # the iterated power method
    _, sa, _ = fast_svd(X, k, q=0)

    # the approximation does not tolerate the noise:
    assert np.abs(s[:k] - sa).max() > 0.05

    # compute the singular values of X using the fast approximate method with
    # iterated power method
    _, sap, _ = fast_svd(X, k, q=5)

    # the iterated power method is helping getting rid of the noise:
    assert_almost_equal(s[:k], sap, decimal=3)
def compute_bench(samples_range, features_range, rank=50, tolerance=1e-7):
    it = 0
    timeset = defaultdict(lambda: [])
    err = defaultdict(lambda: [])

    max_it = len(samples_range) * len(features_range)
    for n_samples in samples_range:
        for n_features in features_range:
            it += 1
            print "===================="
            print "Iteration %03d of %03d" % (it, max_it)
            print "===================="
            X = np.abs(low_rank_fat_tail(n_samples, n_features, effective_rank=rank, tail_strength=0.2))

            gc.collect()
            print "benching nndsvd-nmf: "
            tstart = time()
            m = NMF(n_components=30, tol=tolerance, init="nndsvd").fit(X)
            tend = time() - tstart
            timeset["nndsvd-nmf"].append(tend)
            err["nndsvd-nmf"].append(m.reconstruction_err_)
            print m.reconstruction_err_, tend

            gc.collect()
            print "benching nndsvda-nmf: "
            tstart = time()
            m = NMF(n_components=30, init="nndsvda", tol=tolerance).fit(X)
            tend = time() - tstart
            timeset["nndsvda-nmf"].append(tend)
            err["nndsvda-nmf"].append(m.reconstruction_err_)
            print m.reconstruction_err_, tend

            gc.collect()
            print "benching nndsvdar-nmf: "
            tstart = time()
            m = NMF(n_components=30, init="nndsvdar", tol=tolerance).fit(X)
            tend = time() - tstart
            timeset["nndsvdar-nmf"].append(tend)
            err["nndsvdar-nmf"].append(m.reconstruction_err_)
            print m.reconstruction_err_, tend

            gc.collect()
            print "benching random-nmf"
            tstart = time()
            m = NMF(n_components=30, init=None, max_iter=1000, tol=tolerance).fit(X)
            tend = time() - tstart
            timeset["random-nmf"].append(tend)
            err["random-nmf"].append(m.reconstruction_err_)
            print m.reconstruction_err_, tend

            gc.collect()
            print "benching alt-random-nmf"
            tstart = time()
            W, H = alt_nnmf(X, r=30, R=None, tol=tolerance)
            tend = time() - tstart
            timeset["alt-random-nmf"].append(tend)
            err["alt-random-nmf"].append(np.linalg.norm(X - np.dot(W, H)))
            print np.linalg.norm(X - np.dot(W, H)), tend

    return timeset, err
示例#6
0
def test_fast_svd_transpose_consistency():
    """Check that transposing the design matrix has limit impact"""
    n_samples = 100
    n_features = 500
    rank = 4
    k = 10

    X = low_rank_fat_tail(n_samples, n_features, effective_rank=rank,
                          tail_strength=0.5, seed=0)
    assert_equal(X.shape, (n_samples, n_features))

    U1, s1, V1 = fast_svd(X, k, q=3, transpose=False, rng=0)
    U2, s2, V2 = fast_svd(X, k, q=3, transpose=True, rng=0)
    U3, s3, V3 = fast_svd(X, k, q=3, transpose='auto', rng=0)
    U4, s4, V4 = linalg.svd(X, full_matrices=False)

    assert_almost_equal(s1, s4[:k], decimal=3)
    assert_almost_equal(s2, s4[:k], decimal=3)
    assert_almost_equal(s3, s4[:k], decimal=3)

    assert_almost_equal(np.dot(U1, V1), np.dot(U4[:, :k], V4[:k, :]),
                        decimal=2)
    assert_almost_equal(np.dot(U2, V2), np.dot(U4[:, :k], V4[:k, :]),
                        decimal=2)


    # in this case 'auto' is equivalent to transpose
    assert_almost_equal(s2, s3)
示例#7
0
def test_fast_svd_infinite_rank():
    """Check that extmath.fast_svd can handle noisy matrices"""
    n_samples = 100
    n_features = 500
    rank = 5
    k = 10

    # let us try again without 'low_rank component': just regularly but slowly
    # decreasing singular values: the rank of the data matrix is infinite
    X = low_rank_fat_tail(n_samples,
                          n_features,
                          effective_rank=rank,
                          tail_strength=1.0,
                          seed=0)
    assert_equal(X.shape, (n_samples, n_features))

    # compute the singular values of X using the slow exact method
    _, s, _ = linalg.svd(X, full_matrices=False)

    # compute the singular values of X using the fast approximate method without
    # the iterated power method
    _, sa, _ = fast_svd(X, k, q=0)

    # the approximation does not tolerate the noise:
    assert np.abs(s[:k] - sa).max() > 0.1

    # compute the singular values of X using the fast approximate method with
    # iterated power method
    _, sap, _ = fast_svd(X, k, q=5)

    # the iterated power method is still managing to get most of the structure
    # at the requested rank
    assert_almost_equal(s[:k], sap, decimal=3)
示例#8
0
def test_fast_svd_low_rank_with_noise():
    """Check that extmath.fast_svd can handle noisy matrices"""
    n_samples = 100
    n_features = 500
    rank = 5
    k = 10

    # generate a matrix X wity structure approximate rank `rank` and an
    # important noisy component
    X = low_rank_fat_tail(n_samples,
                          n_features,
                          effective_rank=rank,
                          tail_strength=0.5,
                          seed=0)
    assert_equal(X.shape, (n_samples, n_features))

    # compute the singular values of X using the slow exact method
    _, s, _ = linalg.svd(X, full_matrices=False)

    # compute the singular values of X using the fast approximate method without
    # the iterated power method
    _, sa, _ = fast_svd(X, k, q=0)

    # the approximation does not tolerate the noise:
    assert np.abs(s[:k] - sa).max() > 0.05

    # compute the singular values of X using the fast approximate method with
    # iterated power method
    _, sap, _ = fast_svd(X, k, q=5)

    # the iterated power method is helping getting rid of the noise:
    assert_almost_equal(s[:k], sap, decimal=3)
示例#9
0
def test_fast_svd_transpose_consistency():
    """Check that transposing the design matrix has limit impact"""
    n_samples = 100
    n_features = 500
    rank = 4
    k = 10

    X = low_rank_fat_tail(n_samples,
                          n_features,
                          effective_rank=rank,
                          tail_strength=0.5,
                          seed=0)
    assert_equal(X.shape, (n_samples, n_features))

    U1, s1, V1 = fast_svd(X, k, q=3, transpose=False, rng=0)
    U2, s2, V2 = fast_svd(X, k, q=3, transpose=True, rng=0)
    U3, s3, V3 = fast_svd(X, k, q=3, transpose='auto', rng=0)
    U4, s4, V4 = linalg.svd(X, full_matrices=False)

    assert_almost_equal(s1, s4[:k], decimal=3)
    assert_almost_equal(s2, s4[:k], decimal=3)
    assert_almost_equal(s3, s4[:k], decimal=3)

    assert_almost_equal(np.dot(U1, V1),
                        np.dot(U4[:, :k], V4[:k, :]),
                        decimal=2)
    assert_almost_equal(np.dot(U2, V2),
                        np.dot(U4[:, :k], V4[:k, :]),
                        decimal=2)

    # in this case 'auto' is equivalent to transpose
    assert_almost_equal(s2, s3)
示例#10
0
def test_fast_svd_low_rank():
    """Check that extmath.fast_svd is consistent with linalg.svd"""
    n_samples = 100
    n_features = 500
    rank = 5
    k = 10

    # generate a matrix X of approximate effective rank `rank` and no noise
    # component (very structured signal):
    X = low_rank_fat_tail(n_samples, n_features, effective_rank=rank,
                          tail_strength=0.0, seed=0)
    assert_equal(X.shape, (n_samples, n_features))

    # compute the singular values of X using the slow exact method
    U, s, V = linalg.svd(X, full_matrices=False)

    # compute the singular values of X using the fast approximate method
    Ua, sa, Va = fast_svd(X, k)
    assert_equal(Ua.shape, (n_samples, k))
    assert_equal(sa.shape, (k,))
    assert_equal(Va.shape, (k, n_features))

    # ensure that the singular values of both methods are equal up to the real
    # rank of the matrix
    assert_almost_equal(s[:k], sa)

    # check the singular vectors too (while not checking the sign)
    assert_almost_equal(np.dot(U[:, :k], V[:k, :]), np.dot(Ua, Va))

    # check the sparse matrix representation
    X = sparse.csr_matrix(X)

    # compute the singular values of X using the fast approximate method
    Ua, sa, Va = fast_svd(X, k)
    assert_almost_equal(s[:rank], sa[:rank])
示例#11
0
def compute_bench(samples_range, features_range, q=3, rank=50):

    it = 0

    results = defaultdict(lambda: [])

    max_it = len(samples_range) * len(features_range)
    for n_samples in samples_range:
        for n_features in features_range:
            it += 1
            print '===================='
            print 'Iteration %03d of %03d' % (it, max_it)
            print '===================='
            X = low_rank_fat_tail(n_samples,
                                  n_features,
                                  effective_rank=rank,
                                  tail_strength=0.2)

            gc.collect()
            print "benching scipy svd: "
            tstart = time()
            svd(X, full_matrices=False)
            results['scipy svd'].append(time() - tstart)

            gc.collect()
            print "benching scikit-learn fast_svd: q=0"
            tstart = time()
            fast_svd(X, rank, q=0)
            results['scikit-learn fast_svd (q=0)'].append(time() - tstart)

            gc.collect()
            print "benching scikit-learn fast_svd: q=%d " % q
            tstart = time()
            fast_svd(X, rank, q=q)
            results['scikit-learn fast_svd (q=%d)' % q].append(time() - tstart)

    return results
示例#12
0
def compute_bench(samples_range, features_range, rank=50, tolerance=1e-7):
    it = 0
    timeset = defaultdict(lambda: [])
    err = defaultdict(lambda: [])

    max_it = len(samples_range) * len(features_range)
    for n_samples in samples_range:
        for n_features in features_range:
            it += 1
            print '===================='
            print 'Iteration %03d of %03d' % (it, max_it)
            print '===================='
            X = np.abs(low_rank_fat_tail(n_samples, n_features,
                       effective_rank=rank,  tail_strength=0.2))

            gc.collect()
            print "benching nndsvd-nmf: "
            tstart = time()
            m = NMF(n_components=30, tol=tolerance, init='nndsvd').fit(X)
            tend = time() - tstart
            timeset['nndsvd-nmf'].append(tend)
            err['nndsvd-nmf'].append(m.reconstruction_err_)
            print m.reconstruction_err_, tend

            gc.collect()
            print "benching nndsvda-nmf: "
            tstart = time()
            m = NMF(n_components=30, init='nndsvda',
                    tol=tolerance).fit(X)
            tend = time() - tstart
            timeset['nndsvda-nmf'].append(tend)
            err['nndsvda-nmf'].append(m.reconstruction_err_)
            print m.reconstruction_err_, tend

            gc.collect()
            print "benching nndsvdar-nmf: "
            tstart = time()
            m = NMF(n_components=30, init='nndsvdar',
                    tol=tolerance).fit(X)
            tend = time() - tstart
            timeset['nndsvdar-nmf'].append(tend)
            err['nndsvdar-nmf'].append(m.reconstruction_err_)
            print m.reconstruction_err_, tend

            gc.collect()
            print "benching random-nmf"
            tstart = time()
            m = NMF(n_components=30, init=None, max_iter=1000,
                    tol=tolerance).fit(X)
            tend = time() - tstart
            timeset['random-nmf'].append(tend)
            err['random-nmf'].append(m.reconstruction_err_)
            print m.reconstruction_err_, tend

            gc.collect()
            print "benching alt-random-nmf"
            tstart = time()
            W, H = alt_nnmf(X, r=30, R=None, tol=tolerance)
            tend = time() - tstart
            timeset['alt-random-nmf'].append(tend)
            err['alt-random-nmf'].append(np.linalg.norm(X - np.dot(W, H)))
            print np.linalg.norm(X - np.dot(W, H)), tend

    return timeset, err