示例#1
0
def test_pca_dim():
    """
    """
    n, p = 100, 5
    X = randn(n, p)*.1
    X[:10] += np.array([3, 4, 5, 1, 2])
    pca = PCA(n_comp='mle')
    pca.fit(X)
    assert_true(pca.n_comp == 1)
示例#2
0
def test_infer_dim_2():
    """
    """
    n, p = 1000, 5
    X = randn(n, p)*.1
    X[:10] += np.array([3, 4, 5, 1, 2])
    X[10:20] += np.array([6, 0, 7, 2, -1])
    pca = PCA(n_comp=p)
    pca.fit(X)
    spect = pca.explained_variance_
    assert_true(_infer_dimension_(spect, n, p) > 1)
示例#3
0
def test_pca_check_projection():
    """test that the projection of data is correct
    """
    n, p = 100, 3
    X = randn(n, p) * .1
    X[:10] += np.array([3, 4, 5])
    pca = PCA(n_comp=2)
    pca.fit(X)
    Xt = 0.1* randn(1, p) + np.array([3, 4, 5])
    Yt = pca.transform(Xt)
    Yt /= np.sqrt((Yt**2).sum())
    np.testing.assert_almost_equal(np.abs(Yt[0][0]), 1., 1)
示例#4
0
def test_pca():
    """
    PCA
    """
    pca = PCA(n_comp=2)
    X_r = pca.fit(X).transform(X)
    np.testing.assert_equal(X_r.shape[1], 2)

    pca = PCA()
    pca.fit(X)
    np.testing.assert_almost_equal(pca.explained_variance_ratio_.sum(),
                                   1.0, 3)
示例#5
0
def test_infer_dim_1():
    """
    """
    n, p = 1000, 5
    X = randn(n, p)*0.1 + randn(n, 1)*np.array([3, 4, 5, 1, 2]) + np.array(
        [1, 0, 7, 4, 6])
    pca = PCA(n_comp=p)
    pca.fit(X)
    spect = pca.explained_variance_
    ll = []
    for k in range(p):
         ll.append(_assess_dimension_(spect, k, n, p))
    ll = np.array(ll)
    assert_true(ll[1] > ll.max() - .01 * n)
print __doc__

import pylab as pl

from scikits.learn import datasets
from scikits.learn.pca import PCA
from scikits.learn.lda import LDA

iris = datasets.load_iris()

X = iris.data
y = iris.target
target_names = iris.target_names

pca = PCA(n_components=2)
X_r = pca.fit(X).transform(X)

lda = LDA(n_components=2)
X_r2 = lda.fit(X, y).transform(X)

# Percentage of variance explained for each components
print 'explained variance ratio (first two components):', \
    pca.explained_variance_ratio_

pl.figure()
pl.subplot(2, 1, 1)
for c, i, target_name in zip("rgb", [0, 1, 2], target_names):
    pl.scatter(X_r[y == i, 0], X_r[y == i, 1], c=c, label=target_name)
pl.legend()
pl.title('PCA of IRIS dataset')
示例#7
0
from scikits.learn.fastica import FastICA

if __name__ == '__main__':

    ###############################################################################
    # Generate sample data
    S = np.random.standard_t(1.5, size=(2, 10000))
    S[0] *= 2.

    # Mix data
    A = [[1, 1], [0, 2]] # Mixing matrix

    X = np.dot(A, S) # Generate observations

    pca = PCA()
    S_pca_ = pca.fit(X.T).transform(X.T).T

    ica = FastICA()
    S_ica_ = ica.fit(X).transform(X) # Estimate the sources

    S_ica_ /= S_ica_.std(axis=1)[:,np.newaxis]

    ###############################################################################
    # Plot results

    def plot_samples(S, axis_list=None):
        pl.scatter(S[0], S[1], s=2, marker='o', linewidths=0, zorder=10)
        if axis_list is not None:
            colors = [(0, 0.6, 0), (0.6, 0, 0)]
            for color, axis in zip(colors, axis_list):
                axis /= axis.std()
示例#8
0
feature space) that account for the most variance in the data. Here we
plot the different samples on the 2 first principal components.
"""
print __doc__

import pylab as pl

from scikits.learn import datasets
from scikits.learn.pca import PCA

iris = datasets.load_iris()

X = iris.data
y = iris.target
target_names = iris.target_names

pca = PCA(n_components=2)
X_r = pca.fit(X).transform(X)

# Percentage of variance explained for each components
print pca.explained_variance_

pl.figure()
for c, i, target_name in zip("rgb", [0, 1, 2], target_names):
   pl.scatter(X_r[y==i,0], X_r[y==i,1], c=c, label=target_name)
pl.legend()
pl.title('PCA of IRIS dataset')

pl.show()

import numpy as np
from scikits.learn.pca import PCA

from src.data_interface import d, L_clean, L
from src.utils import get_path, bool_to_color


path = get_path(__file__) + '/..'
L = list(L)

# Remove trial_id, obsnum and is alert
# I change notation here from D to X
X = d.view()[:,3:]

pca = PCA(n_components=30)
pca.fit(X)

plt.plot(np.cumsum(pca.explained_variance_ratio_), marker='o')
ax = plt.gca()
plt.title('Cumulative percentage of total variation explained by principal components')
ax.set_xlabel('Principal component')
ax.set_ylabel('% of total variation')
plt.savefig('{0}/plots/pca-variation-explained.pdf'.format(path), papertype='a4', format='pdf')
plt.cla()

W = pca.components_[:,0:3]
X_p = np.dot(X,W)

rnd_rows = np.random.random_integers(0, X.shape[0], 120)

colors = map(bool_to_color, d.view()[rnd_rows,L.index('IsAlert')])
示例#10
0
from scikits.learn.pca import PCA
from scikits.learn.fastica import FastICA

###############################################################################
# Generate sample data
S = np.random.standard_t(1.5, size=(2, 10000))
S[0] *= 2.

# Mix data
A = [[1, 1], [0, 2]]  # Mixing matrix

X = np.dot(A, S)  # Generate observations

pca = PCA()
S_pca_ = pca.fit(X.T).transform(X.T).T

ica = FastICA()
S_ica_ = ica.fit(X).transform(X)  # Estimate the sources

S_ica_ /= S_ica_.std(axis=1)[:, np.newaxis]

###############################################################################
# Plot results


def plot_samples(S, axis_list=None):
    pl.scatter(S[0], S[1], s=2, marker='o', linewidths=0, zorder=10)
    if axis_list is not None:
        colors = [(0, 0.6, 0), (0.6, 0, 0)]
        for color, axis in zip(colors, axis_list):