################################################################################ # load sample dataset from scikits.learn.datasets import load_iris iris = load_iris() X = iris.data[:,:2] # Take only 2 dimensions y = iris.target X = X[y > 0] y = y[y > 0] y -= 1 target_names = iris.target_names[1:] ################################################################################ # LDA lda = LDA() y_pred = lda.fit(X, y, store_covariance=True).predict(X) # QDA qda = QDA() y_pred = qda.fit(X, y, store_covariances=True).predict(X) ############################################################################### # Plot results def plot_ellipse(splot, mean, cov, color): v, w = linalg.eigh(cov) u = w[0] / linalg.norm(w[0]) angle = np.arctan(u[1]/u[0]) angle = 180 * angle / np.pi # convert to degrees # filled gaussian at 2 standard deviation
import pylab as pl from scikits.learn import datasets from scikits.learn.pca import PCA from scikits.learn.lda import LDA iris = datasets.load_iris() X = iris.data y = iris.target target_names = iris.target_names pca = PCA(n_components=2) X_r = pca.fit(X).transform(X) lda = LDA(n_components=2) X_r2 = lda.fit(X, y).transform(X) # Percentage of variance explained for each components print 'explained variance ratio (first two components):', \ pca.explained_variance_ratio_ pl.figure() pl.subplot(2, 1, 1) for c, i, target_name in zip("rgb", [0, 1, 2], target_names): pl.scatter(X_r[y == i, 0], X_r[y == i, 1], c=c, label=target_name) pl.legend() pl.title('PCA of IRIS dataset') pl.subplot(2, 1, 2) for c, i, target_name in zip("rgb", [0, 1, 2], target_names):
################################################################################ # load sample dataset from scikits.learn.datasets import load_iris iris = load_iris() X = iris.data[:, :2] # Take only 2 dimensions y = iris.target X = X[y > 0] y = y[y > 0] y -= 1 target_names = iris.target_names[1:] ################################################################################ # LDA lda = LDA() y_pred = lda.fit(X, y, store_covariance=True).predict(X) # QDA qda = QDA() y_pred = qda.fit(X, y, store_covariances=True).predict(X) ############################################################################### # Plot results def plot_ellipse(splot, mean, cov, color): v, w = linalg.eigh(cov) u = w[0] / linalg.norm(w[0]) angle = np.arctan(u[1] / u[0]) angle = 180 * angle / np.pi # convert to degrees
from scikits.learn import datasets from scikits.learn.decomposition import PCA from scikits.learn.lda import LDA iris = datasets.load_iris() X = iris.data y = iris.target target_names = iris.target_names print target_names pca = PCA(n_components=2) X_r = pca.fit(X).transform(X) lda = LDA(n_components=2) X_r2 = lda.fit(X, y).transform(X) # Percentage of variance explained for each components print 'explained variance ratio (first two components):', \ pca.explained_variance_ratio_ pl.figure() for c, i, target_name in zip("rgb", [0, 1, 2], target_names): pl.scatter(X_r[y == i, 0], X_r[y == i, 1], c=c, label=target_name) pl.legend() pl.title('PCA of IRIS dataset') pl.figure() for c, i, target_name in zip("rgb", [0, 1, 2], target_names): pl.scatter(X_r2[y == i, 0], X_r2[y == i, 1], c=c, label=target_name)
============================ A classification example using Linear Discriminant Analysis (LDA). """ import numpy as np ################################################################################ # import some data to play with # The IRIS dataset from scikits.learn import datasets iris = datasets.load_iris() # Some noisy data not correlated E = np.random.normal(size=(len(iris.data), 35)) # Add the noisy data to the informative features X = np.hstack((iris.data, E)) y = iris.target ################################################################################ # LDA from scikits.learn.lda import LDA lda = LDA() y_pred = lda.fit(X, y).predict(X) print "Number of mislabeled points : %d"%(y != y_pred).sum()
180 + angle, color=color) ell.set_clip_box(splot.bbox) ell.set_alpha(0.5) splot.add_artist(ell) def plot_lda_cov(lda, splot): plot_ellipse(splot, lda.means_[0], lda.covariance_, 'red') plot_ellipse(splot, lda.means_[1], lda.covariance_, 'blue') def plot_qda_cov(qda, splot): plot_ellipse(splot, qda.means_[0], qda.covariances_[0], 'red') plot_ellipse(splot, qda.means_[1], qda.covariances_[1], 'blue') ############################################################################### for i, (X, y) in enumerate([dataset_fixed_cov(), dataset_cov()]): # LDA lda = LDA() y_pred = lda.fit(X, y, store_covariance=True).predict(X) splot = plot_data(lda, X, y, y_pred, fig_index=2 * i + 1) plot_lda_cov(lda, splot) pl.axis('tight') # QDA qda = QDA() y_pred = qda.fit(X, y, store_covariances=True).predict(X) splot = plot_data(qda, X, y, y_pred, fig_index=2 * i + 2) plot_qda_cov(qda, splot) pl.axis('tight') pl.suptitle('LDA vs QDA') pl.show()
splot.add_artist(ell) def plot_lda_cov(lda, splot): plot_ellipse(splot, lda.means_[0], lda.covariance_, 'red') plot_ellipse(splot, lda.means_[1], lda.covariance_, 'blue') def plot_qda_cov(qda, splot): plot_ellipse(splot, qda.means_[0], qda.covariances_[0], 'red') plot_ellipse(splot, qda.means_[1], qda.covariances_[1], 'blue') ############################################################################### for i, (X, y) in enumerate([dataset_fixed_cov(), dataset_cov()]): # LDA lda = LDA() y_pred = lda.fit(X, y, store_covariance=True).predict(X) splot = plot_data(lda, X, y, y_pred, fig_index=2 * i + 1) plot_lda_cov(lda, splot) pl.axis('tight') # QDA qda = QDA() y_pred = qda.fit(X, y, store_covariances=True).predict(X) splot = plot_data(qda, X, y, y_pred, fig_index=2 * i + 2) plot_qda_cov(qda, splot) pl.axis('tight') pl.suptitle('LDA vs QDA') pl.show()