def test_pca_dim(): """ """ n, p = 100, 5 X = randn(n, p)*.1 X[:10] += np.array([3, 4, 5, 1, 2]) pca = PCA(n_comp='mle') pca.fit(X) assert_true(pca.n_comp == 1)
def test_infer_dim_2(): """ """ n, p = 1000, 5 X = randn(n, p)*.1 X[:10] += np.array([3, 4, 5, 1, 2]) X[10:20] += np.array([6, 0, 7, 2, -1]) pca = PCA(n_comp=p) pca.fit(X) spect = pca.explained_variance_ assert_true(_infer_dimension_(spect, n, p) > 1)
def test_pca_check_projection(): """test that the projection of data is correct """ n, p = 100, 3 X = randn(n, p) * .1 X[:10] += np.array([3, 4, 5]) pca = PCA(n_comp=2) pca.fit(X) Xt = 0.1* randn(1, p) + np.array([3, 4, 5]) Yt = pca.transform(Xt) Yt /= np.sqrt((Yt**2).sum()) np.testing.assert_almost_equal(np.abs(Yt[0][0]), 1., 1)
def test_pca(): """ PCA """ pca = PCA(n_comp=2) X_r = pca.fit(X).transform(X) np.testing.assert_equal(X_r.shape[1], 2) pca = PCA() pca.fit(X) np.testing.assert_almost_equal(pca.explained_variance_ratio_.sum(), 1.0, 3)
def test_infer_dim_1(): """ """ n, p = 1000, 5 X = randn(n, p)*0.1 + randn(n, 1)*np.array([3, 4, 5, 1, 2]) + np.array( [1, 0, 7, 4, 6]) pca = PCA(n_comp=p) pca.fit(X) spect = pca.explained_variance_ ll = [] for k in range(p): ll.append(_assess_dimension_(spect, k, n, p)) ll = np.array(ll) assert_true(ll[1] > ll.max() - .01 * n)
print __doc__ import pylab as pl from scikits.learn import datasets from scikits.learn.pca import PCA from scikits.learn.lda import LDA iris = datasets.load_iris() X = iris.data y = iris.target target_names = iris.target_names pca = PCA(n_components=2) X_r = pca.fit(X).transform(X) lda = LDA(n_components=2) X_r2 = lda.fit(X, y).transform(X) # Percentage of variance explained for each components print 'explained variance ratio (first two components):', \ pca.explained_variance_ratio_ pl.figure() pl.subplot(2, 1, 1) for c, i, target_name in zip("rgb", [0, 1, 2], target_names): pl.scatter(X_r[y == i, 0], X_r[y == i, 1], c=c, label=target_name) pl.legend() pl.title('PCA of IRIS dataset')
from scikits.learn.fastica import FastICA if __name__ == '__main__': ############################################################################### # Generate sample data S = np.random.standard_t(1.5, size=(2, 10000)) S[0] *= 2. # Mix data A = [[1, 1], [0, 2]] # Mixing matrix X = np.dot(A, S) # Generate observations pca = PCA() S_pca_ = pca.fit(X.T).transform(X.T).T ica = FastICA() S_ica_ = ica.fit(X).transform(X) # Estimate the sources S_ica_ /= S_ica_.std(axis=1)[:,np.newaxis] ############################################################################### # Plot results def plot_samples(S, axis_list=None): pl.scatter(S[0], S[1], s=2, marker='o', linewidths=0, zorder=10) if axis_list is not None: colors = [(0, 0.6, 0), (0.6, 0, 0)] for color, axis in zip(colors, axis_list): axis /= axis.std()
feature space) that account for the most variance in the data. Here we plot the different samples on the 2 first principal components. """ print __doc__ import pylab as pl from scikits.learn import datasets from scikits.learn.pca import PCA iris = datasets.load_iris() X = iris.data y = iris.target target_names = iris.target_names pca = PCA(n_components=2) X_r = pca.fit(X).transform(X) # Percentage of variance explained for each components print pca.explained_variance_ pl.figure() for c, i, target_name in zip("rgb", [0, 1, 2], target_names): pl.scatter(X_r[y==i,0], X_r[y==i,1], c=c, label=target_name) pl.legend() pl.title('PCA of IRIS dataset') pl.show()
import numpy as np from scikits.learn.pca import PCA from src.data_interface import d, L_clean, L from src.utils import get_path, bool_to_color path = get_path(__file__) + '/..' L = list(L) # Remove trial_id, obsnum and is alert # I change notation here from D to X X = d.view()[:,3:] pca = PCA(n_components=30) pca.fit(X) plt.plot(np.cumsum(pca.explained_variance_ratio_), marker='o') ax = plt.gca() plt.title('Cumulative percentage of total variation explained by principal components') ax.set_xlabel('Principal component') ax.set_ylabel('% of total variation') plt.savefig('{0}/plots/pca-variation-explained.pdf'.format(path), papertype='a4', format='pdf') plt.cla() W = pca.components_[:,0:3] X_p = np.dot(X,W) rnd_rows = np.random.random_integers(0, X.shape[0], 120) colors = map(bool_to_color, d.view()[rnd_rows,L.index('IsAlert')])
from scikits.learn.pca import PCA from scikits.learn.fastica import FastICA ############################################################################### # Generate sample data S = np.random.standard_t(1.5, size=(2, 10000)) S[0] *= 2. # Mix data A = [[1, 1], [0, 2]] # Mixing matrix X = np.dot(A, S) # Generate observations pca = PCA() S_pca_ = pca.fit(X.T).transform(X.T).T ica = FastICA() S_ica_ = ica.fit(X).transform(X) # Estimate the sources S_ica_ /= S_ica_.std(axis=1)[:, np.newaxis] ############################################################################### # Plot results def plot_samples(S, axis_list=None): pl.scatter(S[0], S[1], s=2, marker='o', linewidths=0, zorder=10) if axis_list is not None: colors = [(0, 0.6, 0), (0.6, 0, 0)] for color, axis in zip(colors, axis_list):