def naive_bayes_with_lda(): train, train_target, test, test_target = load_polluted_spambase() print "Train data: %s, Train Label: %s" % (train.shape, train_target.shape) print "Test data: %s, Test Label: %s" % (test.shape, test_target.shape) start = timeit.default_timer() lda = LDA(n_components=100) train = lda.fit_transform(train, train_target) test = lda.transform(test) print lda print "Train data: %s, Train Label: %s" % (train.shape, train_target.shape) print "Test data: %s, Test Label: %s" % (test.shape, test_target.shape) cf = GaussianNaiveBayes() cf.fit(train, train_target) raw_predicts = cf.predict(test) predict_class = cf.predict_class(raw_predicts) cm = confusion_matrix(test_target, predict_class) print "confusion matrix: TN: %s, FP: %s, FN: %s, TP: %s" % ( cm[0, 0], cm[0, 1], cm[1, 0], cm[1, 1]) er, acc, fpr, tpr = confusion_matrix_analysis(cm) print 'Error rate: %f, accuracy: %f, FPR: %f, TPR: %f' % (er, acc, fpr, tpr) stop = timeit.default_timer() print "Total Run Time: %s secs" % (stop - start)
def naive_bayes_with_lda(): train, train_target, test, test_target = load_polluted_spambase() print "Train data: %s, Train Label: %s" % (train.shape, train_target.shape) print "Test data: %s, Test Label: %s" % (test.shape, test_target.shape) start = timeit.default_timer() lda = LDA(n_components=100) train = lda.fit_transform(train, train_target) test = lda.transform(test) print lda print "Train data: %s, Train Label: %s" % (train.shape, train_target.shape) print "Test data: %s, Test Label: %s" % (test.shape, test_target.shape) cf = GaussianNaiveBayes() cf.fit(train, train_target) raw_predicts = cf.predict(test) predict_class = cf.predict_class(raw_predicts) cm = confusion_matrix(test_target, predict_class) print "confusion matrix: TN: %s, FP: %s, FN: %s, TP: %s" % (cm[0, 0], cm[0, 1], cm[1, 0], cm[1, 1]) er, acc, fpr, tpr = confusion_matrix_analysis(cm) print "Error rate: %f, accuracy: %f, FPR: %f, TPR: %f" % (er, acc, fpr, tpr) stop = timeit.default_timer() print "Total Run Time: %s secs" % (stop - start)
def runLDA(all_kmer_vectors_array,labels): sklearn_lda = LDA(n_components=4) X = np.array(all_kmer_vectors_array) y = np.array(labels) X_lda_sklearn = sklearn_lda.fit_transform(X,y) print(X_lda_sklearn) return X_lda_sklearn
def lda_scikit(): df_wine = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data', header=None) df_wine.columns = ['Class label', 'Alcohol', 'Malic acid', 'Ash', 'Alcalinity of ash', 'Magnesium', 'Total phenols', 'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins', 'Color intensity', 'Hue', 'OD280/OD315 of diluted wines', 'Proline'] X, y = df_wine.iloc[:, 1:].values, df_wine.iloc[:, 0].values X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=0.3, random_state=0) sc = StandardScaler() X_train_std = sc.fit_transform(X_train) X_test_std = sc.transform(X_test) pdb.set_trace() lda = LDA(n_components=3) X_train_lda = lda.fit_transform(X_train_std, y_train) lr = LogisticRegression() lr = lr.fit(X_train_lda, y_train) plot_decision_regions(X_train_lda, y_train, classifier=lr) plt.xlabel('LD 1') plt.ylabel('LD 2') plt.legend(loc='lower left') plt.tight_layout() plt.savefig(PL5 + 'lda_scikit.png', dpi=300) plt.close() X_test_lda = lda.transform(X_test_std) plot_decision_regions(X_test_lda, y_test, classifier=lr) plt.xlabel('LD 1') plt.ylabel('LD 2') plt.legend(loc='lower left') plt.tight_layout() plt.savefig(PL5 + 'lda_scikit_test.png', dpi=300)
def combine_lda_pca(X, y): sklearn_lda = LDA(n_components=2) X_lda_sklearn = sklearn_lda.fit_transform(X, y) sklearn_pca = sklearnPCA(n_components=2) #PCA X_ldapca_sklearn = sklearn_pca.fit_transform(X_lda_sklearn) plot_scikit_lda(X_ldapca_sklearn, title='LDA+PCA via scikit-learn', mirror=(-1))
def _lda(self): """ performs linear discriminant analysis of the data""" if not self.target: print "target has not been set, it is required for LDA" return lda = LDA(n_components=self.n) self.transformed_data = lda.fit_transform( self.data, self.target).transform(self.data)
def reduceDimensionLDA(mat, k): print mat.shape labels = mat[:, -1] mat = mat[:, :-1] lda = LDA(n_components = k) data = lda.fit_transform(mat, labels) data = addLabels(data, labels) print data return data
def lda(df, samples, sample_labels, plot_name='lda_plot.png'): df = df.copy() df = df.transpose() df = df.ix[samples] df_nrm = normalize_min_max(df) X = df_nrm.values label_dict, y = encode_labels(sample_labels) ldas = LDA(n_components=2) X_lda = ldas.fit_transform(X, y) plot_scikit_lda(X_lda, y, label_dict, samples)
def lda_decompose(dataset, n): lda = LDA(n_components=n) reduced_features = lda.fit_transform(dataset.all.features, dataset.all.target) training_size = dataset.training_size training = Data(reduced_features[:training_size, :], dataset.all.target[:training_size]) testing = Data(reduced_features[training_size:, :], dataset.all.target[training_size:]) return DataSet(training, testing)
def execute(self,i,j): # dim_red = LDA() # dim_red.fit_transform(self.x_train, self.y_train) # with open('dumped_dim_red_'+str(i)+'.pkl', 'wb') as fid: # cPickle.dump(dim_red, fid) # x_train = dim_red.transform(self.x_train) # x_test = dim_red.transform(self.y_train) # stat_obj = self.stat_class() # reflection bitches # stat_obj.train(x_train, x_test) # print len(x_train) # with open('dumped_'+str(j)+'_'+str(i)+'.pkl', 'wb') as fid: # cPickle.dump(stat_obj, fid) kf = KFold(len(self.x_train), n_folds=self.k_cross) own_kappa = [] for train_idx, test_idx in kf: # print train_idx,test_idx # exit(0) x_train, x_test = self.x_train[train_idx], self.x_train[test_idx] y_train, y_test = self.y_train[train_idx], self.y_train[test_idx] dim_red = LDA() x_train = dim_red.fit_transform(x_train, y_train) # with open('dumped_dim_red_'+str(i)+'.pkl', 'wb') as fid: # cPickle.dump(dim_red, fid) # with open('dumped_dim_red_'+str(i)+'.pkl', 'rb') as fid: # dim_red=cPickle.load(fid) x_test = dim_red.transform(x_test) # with open('dumped_'+str(j)+'_'+str(i)+'.pkl', 'rb') as fid: # stat_obj=cPickle.load(fid) # x_train = dim_red.transform(x_train) # x_test = dim_red.transform(x_test) stat_obj = self.stat_class() # reflection bitches stat_obj.train(x_train,y_train) # with open('dumped_'+str(j)+'_'+str(i)+'.pkl', 'wb') as fid: # cPickle.dump(stat_obj, fid) # with open('dumped_'+str(j)+'_'+str(i)+'.pkl', 'rb') as fid: # stat_obj=cPickle.load(fid) y_pred = [ 0 for i in xrange(len(y_test)) ] for i in range(len(x_test)): # print len(x_test[i]) val = int(np.round(stat_obj.predict(x_test[i]))) if val > self.range_max: val = self.range_max if val < self.range_min: val = self.range_min y_pred[i] = [val] y_pred = np.matrix(y_pred) cohen_kappa_rating = own_wp.quadratic_weighted_kappa(y_test,y_pred,self.range_min,self.range_max) self.values.append(cohen_kappa_rating) return str(sum(self.values)/self.k_cross)
def get_LDA_performance(test_df, X_std, y): X_test = test_df.ix[:, 'x.1':'x.10'].values X_std_test = StandardScaler().fit_transform(X_test) y_test = test_df.ix[:, 'y'].values lda_scores_training = [] lda_scores_test = [] qda_scores_training = [] qda_scores_test = [] knn_scores_training = [] knn_scores_test = [] for d in range(1, 11): lda = LDA(n_components=d) Xred_lda_training = lda.fit_transform(X_std, y) Xred_lda_test = lda.transform(X_std_test) lda_model = LDA() lda_model.fit(Xred_lda_training, y) qda_model = QDA() qda_model.fit(Xred_lda_training, y) knn_model = KNeighborsClassifier(n_neighbors=10) knn_model.fit(Xred_lda_training, y) lda_scores_training.append(1 - lda_model.score(Xred_lda_training, y)) lda_scores_test.append(1 - lda_model.score(Xred_lda_test, y_test)) qda_scores_training.append(1 - qda_model.score(Xred_lda_training, y)) qda_scores_test.append(1 - qda_model.score(Xred_lda_test, y_test)) knn_scores_training.append(1 - knn_model.score(Xred_lda_training, y)) knn_scores_test.append(1 - knn_model.score(Xred_lda_test, y_test)) plt.plot(range(10), lda_scores_training, 'r--', label="Train data") plt.plot(range(10), lda_scores_test, 'b--', label="Test data") plt.title("LDA vs LDA") plt.xlabel('k') plt.ylabel('Score') plt.show() plt.plot(range(10), qda_scores_training, 'r--', label="Train data") plt.plot(range(10), qda_scores_test, 'b--', label="Test data") plt.title("QDA vs LDA") plt.show() plt.plot(range(10), knn_scores_training, 'r--', label="Train data") plt.plot(range(10), knn_scores_test, 'b--', label="Test data") plt.title("KNN vs LDA") plt.show()
class LDA(AbstractProjection): def __init__(self, **kw): super(LDA, self).__init__() self.lda = ScikitLDA(**kw) def train(self, features, labels): red_feats = self.lda.fit_transform(features, labels) self.V = np.std(red_feats, axis=0) def project(self, feats, whiten=True): lda_feats = self.lda.transform(feats) if whiten: lda_feats /= self.V return lda_feats
def execute(self,i,j): x_train= self.x_train y_train= self.y_train dim_red = LDA() x_train = dim_red.fit_transform(x_train, y_train) with open('dumped_dim_red_'+str(i)+'.pkl', 'wb') as fid: cPickle.dump(dim_red, fid) stat_obj = self.stat_class() # reflection bitches stat_obj.train(x_train,y_train) with open('dumped_'+str(j)+'_'+str(i)+'.pkl', 'wb') as fid: cPickle.dump(stat_obj, fid) kf = KFold(len(self.x_train), n_folds=self.k_cross) own_kappa = [] for train_idx, test_idx in kf: # print train_idx,test_idx # exit(0) x_train, x_test = self.x_train[train_idx], self.x_train[test_idx] y_train, y_test = self.y_train[train_idx], self.y_train[test_idx] dim_red = LDA() x_train = dim_red.fit_transform(x_train, y_train) x_test = dim_red.transform(x_test) stat_obj = self.stat_class() # reflection bitches stat_obj.train(x_train,y_train) y_pred = [ 0 for i in xrange(len(y_test)) ] for i in range(len(x_test)): val = int(np.round(stat_obj.predict(x_test[i]))) if val > self.range_max: val = self.range_max if val < self.range_min: val = self.range_min y_pred[i] = [val] y_pred = np.matrix(y_pred) cohen_kappa_rating = own_wp.quadratic_weighted_kappa(y_test,y_pred,self.range_min,self.range_max) self.values.append(cohen_kappa_rating) return sum(self.values)/self.k_cross
def lda_data(X, y, n_components=2, num_data_points=-1): lda = LDA(n_components=n_components) if num_data_points > 0: X = X[:num_data_points, :] y = y[:num_data_points] print "Performing mapping" start = timeit.default_timer() mapped = lda.fit_transform(X, y) end = timeit.default_timer() print "Mapping completed in %f seconds" % (end - start) return mapped, lda
def get_LDA(X_std, y): sklearn_lda = LDA(n_components=2) Xred_lda = sklearn_lda.fit_transform(X_std, y) cmap = plt.cm.get_cmap('Accent') mclasses = (1, 2, 3, 4, 5, 6, 7, 8, 9) mcolors = [cmap(i) for i in np.linspace(0, 1, 10)] plt.figure(figsize=(12, 8)) for lab, col in zip(mclasses, mcolors): plt.scatter(Xred_lda[y == lab, 0], Xred_lda[y == lab, 1], label=lab, c=col) plt.xlabel('LDA/Fisher Direction 1') plt.ylabel('LDA/Fisher Direction 2') leg = plt.legend(loc='upper right', fancybox=True) plt.show()
def with_lda(X_train_std, y_train, X_test_std, y_test): from sklearn.lda import LDA lda = LDA(n_components=2) X_train_lda = lda.fit_transform(X_train_std, y_train) lr = LogisticRegression() lr = lr.fit(X_train_lda, y_train) plot_decision_regions(X_train_lda, y_train, classifier=lr) plot.xlabel('LD 1') plot.ylabel('LD 2') plt.legend(loc='lower left') plt.show() X_test_lda = lda.transform(X_test_std) plot_decision_regions(X_test_lda, y_test, classifier=lr) plot.xlabel('LD 1') plot.ylabel('LD 2') plt.legend(loc='lower left') plt.show()
def execute(self): kf = KFold(len(self.x_train), n_folds=self.k_cross) own_kappa = [] for train_idx, test_idx in kf: x_train, x_test = self.x_train[train_idx], self.x_train[test_idx] y_train, y_test = self.y_train[train_idx], self.y_train[test_idx] dim_red = LDA() x_train = dim_red.fit_transform(x_train, y_train) x_test = dim_red.transform(x_test) stat_obj = self.stat_class() # reflection bitches stat_obj.train(x_train,y_train) y_pred = [ 0 for i in xrange(len(y_test)) ] for i in range(len(x_test)): val = int(np.round(stat_obj.predict(x_test[i]))) if val > self.range_max: val = self.range_max if val < self.range_min: val = self.range_min y_pred[i] = [val] y_pred = np.matrix(y_pred) cohen_kappa_rating = own_wp.quadratic_weighted_kappa(y_test,y_pred,self.range_min,self.range_max) self.values.append(cohen_kappa_rating) return str(sum(self.values)/self.k_cross)
from Wine import getWineData from Util import plot_decision_regions from sklearn.lda import LDA from sklearn.linear_model import LogisticRegression import numpy as np import matplotlib.pyplot as plt X_train_std, X_test_std, y_train, y_test = getWineData() lda = LDA(n_components=2) X_train_lda = lda.fit_transform(X_train_std, y_train) lr = LogisticRegression() lr.fit(X_train_lda, y_train) plot_decision_regions(X_train_lda, y_train, classifier=lr) plt.xlabel('LD 1') plt.ylabel('LD 2') plt.legend(loc='lower left') plt.show()
# newdata = normdata # for i in range(5): # print newdata[i] print "data done" print "logistic initialized" # clf.fit(data[:,:-1], data[:,-1]) print "fitted data" skf = StratifiedKFold(data[:,-1], n_folds=10, shuffle=True) output =[] finalscore = 0 counter = 0 for train, test in skf: counter = counter + 1 newdata = prj.fit_transform([ normdata[i][:] for i in train ],[ data[i][-1] for i in train ]) newtestdata = prj.transform([ normdata[i][:] for i in test ]) clf = GradientBoostingClassifier(warm_start = True) clf = clf.fit(newdata, [ data[i][-1] for i in train ]) prediction = clf.predict(newtestdata) # pred = [] # for i in prediction: # if(i > 1.5): # pred.append(2) # else: # pred.append(1) finalscore = finalscore + score.get_score( prediction , [ data[i][-1] for i in test ]) print "done" # score = cross_val_score(clf, newdata[:,:], data[:,-1], cv = 5, scoring = 'get_score') # print "in scores" # for i in score:
except: pass for i in range(len(s2.split('\n'))): Test.append([]) for j in s2.split('\n')[i].split(','): try: Test[i].append(float(j)) except: pass pca=PCA(n_components=2) x=pca.fit_transform(numpy.array(Data[0:len(Data)-1])) y=pca.fit_transform(numpy.array(Test[0:len(Test)-1])) regr=linear_model.LinearRegression() regr.fit(x,1000*[0]+1000*[1]) clf=LDA(n_components=2) z=clf.fit_transform(numpy.array(Data[0:len(Data)-1]),1000*[0]+1000*[1]) New=clf.predict(Test[0:len(Test)-1]) A=numpy.transpose(x) B=numpy.dot(A,x) C=numpy.dot(inv(B),A) D=numpy.dot(C,1000*[-1]+1000*[1]) E=numpy.transpose(D) F=numpy.dot(y,D) outx=[] for i in F: if i>=0: outx.append(1) else: outx.append(0) outp=[] for i in regr.predict(y):
import pandas as pd from sklearn import datasets from sklearn.decomposition import PCA from sklearn.lda import LDA # Load data iris = datasets.load_iris() idata = iris.data itarget = iris.target species = iris.target_names iris_df = pd.DataFrame(idata, columns = ['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']) iris_df['Species'] = itarget # PCA pca = PCA(n_components = 2) reduced_idata = pca.fit_transform(idata) # LDA lda = LDA(n_components = 2) reduced_itarget = lda.fit_transform(idata, itarget)
from sklearn.datasets import load_iris import numpy as np iris = load_iris() print 'iris.data[-10:]\n', iris.data[-10:] print 'iris.target[-10:]\n', iris.target[-10:] print 'iris.data.shape:', iris.data.shape from sklearn.lda import LDA # Data dimension reduction lda = LDA() # Default n_components setting, max C-1 lda_result1 = lda.fit_transform(iris.data, iris.target) print 'LDA result 1:', lda_result1.shape lda = LDA(n_components=1) lda_result2 = lda.fit_transform(iris.data, iris.target) print 'LDA result 2:', lda_result2.shape # Visualization import matplotlib.pyplot as plt plt.subplot(1,2,1) plt.scatter(lda_result1[iris.target==0, 0], lda_result1[iris.target==0, 1], color='r') plt.scatter(lda_result1[iris.target==1, 0], lda_result1[iris.target==1, 1], color='g') plt.scatter(lda_result1[iris.target==2, 0], lda_result1[iris.target==2, 1], color='b') plt.title('LDA on iris (1)') plt.subplot(1,2,2) plt.stem(lda_result2) plt.title('LDA on iris (2)') plt.show()
# A cada dato se le asigna la clase y un color para diferenciarlas entre si for lab, col in zip(mclasses,mcolors): plt.scatter(Xred_pca[y==lab, 0],Xred_pca[y==lab, 1],label=lab,c=col) # Se configuran las etiquetas plt.xlabel('Principal Component 1') plt.ylabel('Principal Component 2') leg = plt.legend(loc='upper right', fancybox=True) ######## Pregunta (d) ############################################################ # Se utiliza LDA con dos dimensiones sklearn_lda = LDA(n_components=2) # Se ajusta a los datos de entrenamiento Xred_lda = sklearn_lda.fit_transform(X_std,y) # Se escoge la paleta de colores cmap = plt.cm.get_cmap('hsv') # Se definen las clases mclasses=(1,2,3,4,5,6,7,8,9) mcolors = [cmap(i) for i in np.linspace(0,1,10)] # Se establece el tamanno de la figura plt.figure(figsize=(12, 8)) # A cada dato se le asigna la clase y un color para diferenciarlas entre si for lab, col in zip(mclasses,mcolors): plt.scatter(Xred_lda[y==lab, 0],Xred_lda[y==lab, 1],label=lab,c=col)
with closing(open(path.join(__file__, '..', 'data', 'X_train.pkl'), 'rb')) as pkl: data = cPickle.load(pkl) with closing(open(path.join(__file__, '..', 'data', 'y_train.pkl'), 'rb')) as pkl: target = cPickle.load(pkl) vectorizer = CountVectorizer(max_df=.75, min_df = 2, ngram_range=(1,2)) X=vectorizer.fit_transform(data, array(target)) transformer = TfidfTransformer() X = transformer.fit_transform(X, array(target)) #hv = HashingVectorizer() clf = LDA() clf.fit_transform(X.toarray(), array(target), store_covariance=True) for name,obj in [('saclings.txt', clf.scalings_), ('coef.txt', clf.coef_), ('covariance.txt', clf.covariance_),('xbar.txt', clf.xbar_), ('means.txt', clf.means_)]: with closing(open(path.join(opt['out_path'], name), 'wb')) as out: print 'saving %s' % name for row in obj: out.write(str(row)+'\r\n') print 'priors' print clf.priors del X del data del target
#kernel PCA keeping 300 components kpca = KernelPCA(kernel="rbf",n_components=300 , gamma=1) X_kpca = kpca.fit_transform(X_train) X_test = kpca.transform(X_test) print (kpca) print(X_kpca.shape) #lda for dimensionality reduction. It should keep [classes-1] components. lda = LDA() print (lda) X_lda = lda.fit_transform(X_kpca,y_train) X_test = lda.transform(X_test) print(X_lda.shape) #kNN classification start = int(round(time.time() * 1000)) clf = neighbors.KNeighborsClassifier(n_neighbors=5) clf.fit(X_lda, y_train) print (clf) print("---------(5) Cross validation accuracy--------") print(cross_validation.cross_val_score(clf, X_lda,y_train, cv=5))
acc, clusters = run_clustering(X_new) print "average EM score after X modified with ICA", n, "components, clusters =", clusters, "silhouette score =", acc if dralg == 'rp': ####################################################### ######## KMeans after Sparse Random Projection ######## ####################################################### for n in range(1, len(df.columns) + 1): # create the random projection sp = SparseRandomProjection(n_components = n) X_new = sp.fit_transform(X) acc, clusters = run_clustering(X_new) print "average EM score after X modified with Random Projectsion", n, "components, clusters =", clusters, "silhouette score =", acc if dralg == 'lda': ################################## ######## KMeans after LDA ######## ################################## for n in range(1, len(df.columns) + 1): for solver in ['svd', 'eigen']: # create the random projection lda = LDA(n_components = n, solver = solver) X_new = lda.fit_transform(X, y) acc, clusters = run_clustering(X_new) print "average EM score after X modified with LDA", n, "components, clusters =", clusters, "silhouette score =", acc plt.show()
def execute(self, i, j): global save1 global save2 jk = i # print type(jk) # dim_red = LDA() # dim_red.fit_transform(self.x_train, self.y_train) # with open('dumped_dim_red_'+str(i)+'.pkl', 'wb') as fid: # cPickle.dump(dim_red, fid) # x_train = dim_red.transform(self.x_train) # x_test = dim_red.transform(self.y_train) # stat_obj = self.stat_class() # reflection bitches # stat_obj.train(x_train, x_test) # print len(x_train) # with open('dumped_'+str(j)+'_'+str(i)+'.pkl', 'wb') as fid: # cPickle.dump(stat_obj, fid) # save1=None # save2=None kf = KFold(len(self.x_train), n_folds=self.k_cross) own_kappa = [] for train_idx, test_idx in kf: # print train_idx,test_idx # exit(0) x_train, x_test = self.x_train[train_idx], self.x_train[test_idx] y_train, y_test = self.y_train[train_idx], self.y_train[test_idx] dim_red = LDA() x_train = dim_red.fit_transform(x_train, y_train) # with open('dumped_dim_red_'+str(i)+'.pkl', 'wb') as fid: # cPickle.dump(dim_red, fid) # with open('dumped_dim_red_'+str(i)+'.pkl', 'rb') as fid: # dim_red=cPickle.load(fid) x_test = dim_red.transform(x_test) # with open('dumped_'+str(j)+'_'+str(i)+'.pkl', 'rb') as fid: # stat_obj=cPickle.load(fid) # x_train = dim_red.transform(x_train) # x_test = dim_red.transform(x_test) stat_obj = self.stat_class() # reflection bitches stat_obj.train(x_train, y_train) # with open('dumped_'+str(j)+'_'+str(i)+'.pkl', 'wb') as fid: # cPickle.dump(stat_obj, fid) # with open('dumped_'+str(j)+'_'+str(i)+'.pkl', 'rb') as fid: # stat_obj=cPickle.load(fid) y_pred = [0 for i in xrange(len(y_test))] if (int(jk) == 1): # print "test_idx" save1 = stat_obj save2 = dim_red for i in range(len(x_test)): # print len(x_test[i]) val = int(np.round(stat_obj.predict(x_test[i]))) if val > self.range_max: val = self.range_max if val < self.range_min: val = self.range_min y_pred[i] = [val] y_pred = np.matrix(y_pred) cohen_kappa_rating = own_wp.quadratic_weighted_kappa( y_test, y_pred, self.range_min, self.range_max) self.values.append(cohen_kappa_rating) # print stat_obj.predict(x_train) # linear_k_cross = k_fold_cross_validation(cross_valid_k,linear_regression,X_train,Y_train,range_min,range_max) # linesar_accuracy.append(linear_k_cross.execute(i,0)) # logistic_k_cross = k_fold_cross_validation(cross_valid_k,logistic_regression,X_train,Y_train,range_min,range_max) # logistic_accuracy.append(logistic_k_cross.execute(i,1)) # svr_k_cross = k_fold_cross_validation(cross_valid_k,support_vector_regression,X_train,Y_train,range_min,range_max) # svr_accuracy.append(svr_k_cross.execute(i,2)) # svm_k_cross = k_fold_cross_validation(cross_valid_k,support_vector_machine,X_train,Y_train, range_min,range_max) # svm_accuracy.append(svm_k_cross.execute(i,3)) return str(sum(self.values) / self.k_cross)
importance_w, Xw_cols = zip(*sorted(zip(importance_w, Xw_cols))) fig = plt.figure(figsize=(6, 4), dpi=80).add_subplot(111) plt.bar(range(len(Xw_cols)), importance_w, align='center') plt.xticks(range(len(Xw_cols)), Xw_cols, rotation='vertical') plt.xlabel('Features') plt.ylabel('Importance of features') plt.title("PCA for white wine") plt.show() # PCA for red wine pca_transf_r = pca.fit_transform(Xr_minmax) importance_r = pca.explained_variance_ratio_ print importance_r importance_r, Xr_cols = zip(*sorted(zip(importance_r, Xr_cols))) fig = plt.figure(figsize=(6, 4), dpi=80).add_subplot(111) plt.bar(range(len(Xr_cols)), importance_r, color='red', align='center') plt.xticks(range(len(Xr_cols)), Xr_cols, rotation='vertical') plt.xlabel('Features') plt.ylabel('Importance of features') plt.title("PCA for red wine") plt.show() # LDA for white wine from sklearn.lda import LDA lda = LDA(n_components=None) transf_lda = lda.fit_transform(Xw_minmax, Yw)
def fit_lda(df, active_features, y_col, k=2): lda = LDA(n_components=k) X = lda.fit_transform(df[active_features], df[y_col]) return X
plt.figure() plt.pcolormesh(xx, yy, Z, cmap=cmap_light) # Plot also the training points plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold) plt.axis('tight') plt.axis('off') plt.tight_layout() plot_estimator(svc, X, y) plt.show() plt.clf() ###LDA### sklearn_lda = LDA(n_components=2) X_lda_sklearn = sklearn_lda.fit_transform(X, y) print X_lda_sklearn #4. Petal Length X Sepal Width for all 3 plt.scatter(iris.data[:, 1], iris.data[:, 2], c=iris.target) plt.xlabel(iris.feature_names[1]) plt.ylabel(iris.feature_names[2]) plt.scatter(iris.data[0:150, 1], iris.data[0:150, 2], c=iris.target[0:150]) plt.xlabel(iris.feature_names[1]) plt.ylabel(iris.feature_names[2]) from sklearn import svm svc = svm.SVC(kernel='linear', C=10) #The > C, the wider the margin between groups from sklearn import datasets X = X_lda_sklearn #reassigning to LDA
class DotProduct(DP1): name = 'LDA' LDA_components = 2 def __init__(self, X,Y, room, bin_size): assert (room[0][1]-room[0][0])%bin_size == 0 assert (room[1][1]-room[1][0])%bin_size == 0 self.bin_size=bin_size self.room=room self.xblen = (room[0][1]-room[0][0])/bin_size self.yblen = (room[1][1]-room[1][0])/bin_size self.bins = self.xblen*self.yblen self.labels = np.unique(Y) newX = np.zeros([X.shape[0],self.LDA_components+self.bins]) newX[:,-self.bins:] = X[:,-self.bins:] self.lda = LDA(n_components=self.LDA_components) tmp = self.lda.fit_transform(X[:,:-self.bins],Y) import pdb; pdb.set_trace() newX[:,:self.LDA_components] = tmp # This is if X = [cell1, cell2, ..., celln, binfrac1,...,binfrac k^2] self.train(newX,Y,room, bin_size) def classify(self,X): bin_frac = X[-self.bins:].reshape([self.xblen,self.yblen]) X = X[:-self.bins] X = np.squeeze(self.lda.transform(X)) #self.base[cell id, lbl, xbin, ybin] = rate cntxt0 = np.einsum('cxy,c,xy',self.base[:,0,:,:],X,bin_frac) cntxt1 = np.einsum('cxy,c,xy',self.base[:,1,:,:],X,bin_frac) if logging.getLogger().level <= 5: tmp0 = 0 for cell in range(len(X)): tmp0 += np.sum(X[cell]*bin_frac*self.base[cell,0,:,:]) tmp1 = 0 for cell in range(len(X)): tmp1 += np.sum(X[cell]*bin_frac*self.base[cell,1,:,:]) assert np.allclose(tmp0,cntxt0) assert np.allclose(tmp1,cntxt1) #import pdb; pdb.set_trace() if cntxt0 > cntxt1: return {self.labels[0]: 1, self.labels[1]: 0} else: return {self.labels[0]: 0, self.labels[1]: 1} ''' # Normalize if cntxt0 != 0 or cntxt1 != 0: mag = cntxt0+cntxt1 else: mag = 1 cntxt0 /= mag cntxt1 /= mag assert (round(cntxt0 + cntxt1,5) in [0,1])''' return {self.labels[0]: cntxt0, self.labels[1]: cntxt1}
plt.xlabel('LD 1') plt.ylabel('LD 2') plt.legend(loc='lower right') # plt.tight_layout() # plt.savefig('./figures/lda2.png', dpi=300) plt.show() ############################################################################# print(50 * '=') print('Section: LDA via scikit-learn') print(50 * '-') lda = LDA(n_components=2) X_train_lda = lda.fit_transform(X_train_std, y_train) lr = LogisticRegression() lr = lr.fit(X_train_lda, y_train) plot_decision_regions(X_train_lda, y_train, classifier=lr) plt.xlabel('LD 1') plt.ylabel('LD 2') plt.legend(loc='lower left') # plt.tight_layout() # plt.savefig('./images/lda3.png', dpi=300) plt.show() X_test_lda = lda.transform(X_test_std) plot_decision_regions(X_test_lda, y_test, classifier=lr)
#Construct dictionary for saving array final_data = {'M1_d_data':M1_d_data,'S1_d_data':S1_d_data,'pmd_d_data':pmd_d_data,'pmv_d_data':pmv_d_data,'M1_c_data':M1_c_data,'S1_c_data':S1_c_data,'pmd_c_data':pmd_c_data,'pmv_c_data':pmv_c_data,'targets':targets} #Construct temparary dictionary for figure generation final_data_no_targets = {'M1 at Delivery':M1_d_data,'S1 at Delivery':S1_d_data,'PmD at Delivery':pmd_d_data,'PmV at Delivery':pmv_d_data,'M1 at Cue':M1_c_data,'S1 at Cue':S1_c_data,'PmD at Cue':pmd_c_data,'PmV at Cue':pmv_c_data} np.save("multi_reward"+filename[-15:-4]+"_hists_"+name_of_bin,(final_data,unit_names)) #Perform PCA on PSTH followed By LDA on PCA transform of PSTH data and save figure showing results for each bin for key,value in final_data_no_targets.iteritems(): print key lda = LDA(n_components=2) pca = RandomizedPCA(n_components=20) proj = pca.fit_transform(value) proj = lda.fit_transform(proj,targets) print proj.shape plt.clf() plt.scatter(proj[:, 0], proj[:, 1], c=targets) plt.title(key+" from "+name_of_bin) plt.xlabel("LD1") plt.ylabel("LD2") plt.colorbar() plt.savefig(key+" from "+name_of_bin+"s.png") plt.clf()
for line in fileLines: result.append(line.split()) return result ## II.1.1 LDA using sklearn package ## trainingData = fileParser("IrisTrain2014.dt") # Parse iris files testData = fileParser("IrisTest2014.dt") x = [] y = [] for entry in trainingData: x.append([float(entry[0]), float(entry[1])]) y.append(int(entry[2])) lda = LDA(n_components = 2) # initialize LDA for 2 parameter entries transformedLda = lda.fit_transform(x,y) # Function to plot the transformed data def plotLda(lda): colors = ["red", "green", "blue"] i=0 while(i < 3): xs = [] ys = [] n = 0 while(n < len(lda)): if(y[n] == i): xs.append(lda[n][0]) ys.append(lda[n][1]) n += 1 plotter.scatter(x=xs, y=ys, color = colors[i])
plt.scatter(Xred_pca[y==lab, 0],Xred_pca[y==lab, 1],label=lab,c=col) plt.xlabel('Principal Component 1') plt.ylabel('Principal Component 2') leg = plt.legend(loc='upper right', fancybox=True) plt.show() #################################################### ########## Parte d: LDA ############################ #################################################### sklearn_lda = LDA(n_components=2) Xred_lda = sklearn_lda.fit_transform(X_std,y) cmap = plt.cm.get_cmap('Set1') mclasses=(1,2,3,4,5,6,7,8,9) mcolors = [cmap(i) for i in np.linspace(0,1,10)] plt.figure(figsize=(12, 8)) for lab, col in zip(mclasses,mcolors): plt.scatter(Xred_lda[y==lab, 0],Xred_lda[y==lab, 1],label=lab,c=col) plt.xlabel('LDA/Fisher Direction 1') plt.ylabel('LDA/Fisher Direction 2') leg = plt.legend(loc='upper right', fancybox=True) plt.show() # # #################################################### # ########## Parte f: Construcción Clasificador ######
first_half = np.vstack(first_half) second_half = np.hstack(Data[no_mappings/2:no_mappings,:,:,:]) second_half = np.vstack(second_half) Data = np.vstack([first_half,second_half]) # for true targets uncomment next line #targets = np.hstack([targets,targets]) #for random targets uncomment next line targets = np.random.randint(1,no_locations+1,no_mappings*no_locations*no_thwacks) lda = LDA(n_components=14) pca = RandomizedPCA(n_components = 125) classifier = KNeighborsClassifier(8) proj = pca.fit_transform(Data) proj = lda.fit_transform(proj,targets) proj1 = pca.fit_transform(Data) proj1 = lda.fit_transform(proj1,mapping_targets) print(file) plt.clf() plt.scatter(proj[0:proj.shape[0]/2,0],proj[0:proj.shape[0]/2,1],c=targets[0:targets.shape[0]/2]) plt.title(file.rsplit('_')[0]+'_'+file.rsplit('_')[1]+" Before "+file.rsplit('_')[2]+" injection") plt.colorbar() plt.ylabel("LD1") plt.xlabel("LD2") plt.savefig(file.rsplit('_')[0]+'_'+file.rsplit('_')[1]+" Before "+file.rsplit('_')[2]+file[-11:-4]+" injection.svg") plt.show() plt.clf() plt.scatter(proj[proj.shape[0]/2:proj.shape[0],0],proj[proj.shape[0]/2:proj.shape[0],1],c=targets[targets.shape[0]/2:targets.shape[0]])
eigen_vals = pca.explained_variance_ratio_ # Egan-values for each PCAs (Importance) # --- Fitting Model with PCA pca = PCA(n_components=2) # Only take first 2 PCs lr = LogisticRegression() X_train_pca = pca.fit_transform(X_train_std) # fit with trainset X_test_pca = pca.transform(X_test_std) # only transform with testset lr.fit(X_train_pca, Y_train) # (2) Linear Discriminant Analsysi (LDA) - linear separatible # --- Evaluate Importance of LDA from sklearn.linear_model import LogisticRegression from sklearn.lda import LDA lda = LDA(n_components=None) X_train_lda = lda.fit_transform(X_train_std, Y_train) # fit with trainset, (x, y) supervized eigen_vals = lda.explained_variance_ratio_ # Egan-values for each LDAs (Importance) # --- Fitting Model with LDA lda = LDA(n_components=2) X_train_lda = lda.fit_transform(X_train_std, Y_train) # fit with trainset, (x, y) supervized X_test_lda = lda.transform(X_test_std) # only transform with testset lr.fit(X_train_lda, Y_train) # (3) Kernel Principal Component Analysis (K-PCA) - non-linear separatible from sklearn.decomposition import KernelPCA scikit_kpca = KernelPCA(n_components=2, kernel='rbf', gamma=15) # can choose other kernel methods, 2 PCAs = features X_skernpca = scikit_kpca.fit_transform(X_train_std)
from sklearn.svm import LinearSVC kappa_scorer = make_scorer(cohen_kappa_score) grid = GridSearchCV(LinearSVC(), param_grid={'C': [1, 10]}, scoring=kappa_scorer) #EXEMPLO 20 - LATENT FACTOR ANALYSIS (LFA) from sklearn.decomposition import FactorAnalysis fact_2c = FactorAnalysis(n_components=2) X_factor = fact_2c.fit_transform(iris.data) plt.scatter(X_factor[:,0], X_factor[:,1], c=iris.target, alpha=0.8, s=60, marker='o', edgecolors='white') plt.show() #EXEMPLO 21 - LINEAR DISCRIMINANT ANALYSIS (LDA) from sklearn.lda import LDA lda_2c = LDA(n_components=2) X_lda_2c = lda_2c.fit_transform(iris.data, iris.target) plt.scatter(X_lda_2c[:,0], X_lda_2c[:,1], c=iris.target, alpha=0.8, edgecolors='none'); plt.show() #EXEMPLO 22 - KERNEL PCA from sklearn.decomposition import KernelPCA kpca_2c = KernelPCA(n_components=2, kernel='rbf') X_kpca_2c = kpca_2c.fit_transform(fake_circular_data) plt.scatter(X_kpca_2c[:,0], X_kpca_2c[:,1], c=fake_circular_target, alpha=0.8, s=60, marker='o', edgecolors='white') plt.show() #EXEMPLO 22 - MARS import numpy from pyearth import Earth from matplotlib import pyplot
plt.plot(transformedXg[:x1g.shape[0], 0], transformedXg[:x1g.shape[0], 1], 'o') plt.plot(transformedXg[x1g.shape[0]:, 0], transformedXg[x1g.shape[0]:, 1], 'x') plt.show() # <h2 style="color:purple"> LDA </h2> # In[25]: from sklearn.lda import LDA lda = LDA(n_components=5) xg = pd.concat([x1g, x2g]) yg = np.zeros(xg.shape[0]) yg[:x1g.shape[0]] = np.ones(x1g.shape[0]) transformedXg2 = lda.fit_transform(xg, yg) print transformedXg2.shape plt.plot(transformedXg2[:x1g.shape[0], 0], np.zeros(x1g.shape[0]), 'o') plt.plot(transformedXg2[x1g.shape[0]:, 0], np.zeros(x2g.shape[0]), 'x') plt.show() # <h2 style="color:purple"> Supervised Graph</h2> # In[ ]: # <h2 style="color:purple"> Unsupervised Graph</h2> # In[ ]: # <h2 style="color:green"> Classify </h2>
class DotProduct(DP1): name = 'LDA' LDA_components = 2 def __init__(self, X, Y, room, bin_size): assert (room[0][1] - room[0][0]) % bin_size == 0 assert (room[1][1] - room[1][0]) % bin_size == 0 self.bin_size = bin_size self.room = room self.xblen = (room[0][1] - room[0][0]) / bin_size self.yblen = (room[1][1] - room[1][0]) / bin_size self.bins = self.xblen * self.yblen self.labels = np.unique(Y) newX = np.zeros([X.shape[0], self.LDA_components + self.bins]) newX[:, -self.bins:] = X[:, -self.bins:] self.lda = LDA(n_components=self.LDA_components) tmp = self.lda.fit_transform(X[:, :-self.bins], Y) import pdb pdb.set_trace() newX[:, :self.LDA_components] = tmp # This is if X = [cell1, cell2, ..., celln, binfrac1,...,binfrac k^2] self.train(newX, Y, room, bin_size) def classify(self, X): bin_frac = X[-self.bins:].reshape([self.xblen, self.yblen]) X = X[:-self.bins] X = np.squeeze(self.lda.transform(X)) #self.base[cell id, lbl, xbin, ybin] = rate cntxt0 = np.einsum('cxy,c,xy', self.base[:, 0, :, :], X, bin_frac) cntxt1 = np.einsum('cxy,c,xy', self.base[:, 1, :, :], X, bin_frac) if logging.getLogger().level <= 5: tmp0 = 0 for cell in range(len(X)): tmp0 += np.sum(X[cell] * bin_frac * self.base[cell, 0, :, :]) tmp1 = 0 for cell in range(len(X)): tmp1 += np.sum(X[cell] * bin_frac * self.base[cell, 1, :, :]) assert np.allclose(tmp0, cntxt0) assert np.allclose(tmp1, cntxt1) #import pdb; pdb.set_trace() if cntxt0 > cntxt1: return {self.labels[0]: 1, self.labels[1]: 0} else: return {self.labels[0]: 0, self.labels[1]: 1} ''' # Normalize if cntxt0 != 0 or cntxt1 != 0: mag = cntxt0+cntxt1 else: mag = 1 cntxt0 /= mag cntxt1 /= mag assert (round(cntxt0 + cntxt1,5) in [0,1])''' return {self.labels[0]: cntxt0, self.labels[1]: cntxt1}
labels = set(df.columns.values) labels.remove('y') X_raw = df[list(labels)] X_train, _, _ = one_hot_dataframe(X_raw, [ 'job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'poutcome' ], replace=True) y_train = [1 if i == 'yes' else 0 for i in df.y] reductions = [] pca = PCA(n_components=2) reductions.append(pca.fit_transform(X_train, y_train)) lda = LDA(n_components=2) reductions.append(lda.fit_transform(X_train, y_train)) isomap = Isomap(n_components=2) reductions.append(isomap.fit_transform(X_train, y_train)) lle = LocallyLinearEmbedding(n_components=2, method='standard') reductions.append(lle.fit_transform(X_train, y_train)) for reduced_X in reductions: plt.figure() red_x = [] red_y = [] blue_x = [] blue_y = [] green_x = [] green_y = [] for i in range(len(reduced_X)):
n1 = Y[Y==0].shape[0] n2 = Y[Y==1].shape[0] print "PCA:" pca = PCA(n_components=Components) transformed = pca.fit_transform(X) plt.plot(transformed[:n1,0],transformed[:n1,1],'o') plt.plot(transformed[n1:,0],transformed[n1:,1],'x') plt.show() print "LDA:" lda = LDA() transformed2 = lda.fit_transform(X,Y) plt.plot(transformed2[:n1,0],np.zeros(n1),'o') plt.plot(transformed2[n1:,0],np.zeros(n2),'x') plt.show() print "unsupervised:" numFeature = X.shape[1] numData = X.shape[0] numNode = numFeature + numData A = np.zeros((numNode,numNode)) # construct feature-data for i in range(numData): for j in xrange(numFeature): A[i+numFeature,j] = X.iloc[i,j] A[j,i+numFeature] = X.iloc[i,j]
# remove axis spines ax.spines["top"].set_visible(False) ax.spines["right"].set_visible(False) ax.spines["bottom"].set_visible(False) ax.spines["left"].set_visible(False) plt.grid() plt.tight_layout plt.show() os.chdir("F:\Analytics\ISB Study\Capstone\dir_data\dir_data") X_train, y_train, X_test, y_test, X_val, y_val = load_svmlight_files(("train\\vision_cuboids_histogram.txt", "test\\vision_cuboids_histogram.txt","validation\\vision_cuboids_histogram.txt")) np.unique(y_train) sklearn_lda = LDA(n_components=30) X_lda_sklearn = sklearn_lda.fit_transform(X_train.todense(), y_train) plot_scikit_lda(X_lda_sklearn, title='LDA vision_cuboids_histogram') # PCA sklearn_pca = sklearnPCA(n_components=30) X_pca = sklearn_pca.fit_transform(X_train.todense()) plot_pca(title = 'PCA vision_cuboids_histogram') # X_ldapca_sklearn = sklearn_pca.fit_transform(X_lda_sklearn) plot_scikit_lda(X_ldapca_sklearn, title='LDA+PCA LDA vision_cuboids_histogram', mirror=(-1))
x_test = sc.transform(x_test) import numpy as np cov_mat = np.cov(x_train.T) eigen_vals, eigen_vecs = np.linalg.eig(cov_mat) print('eigenvals', eigen_vals) tot = sum(eigen_vals) var_exp = [(i / tot) for i in sorted(eigen_vals, reverse=True)] cum_var_exp = np.cumsum(var_exp) import matplotlib.pyplot as plt plt.figure(figsize=(6, 4)) plt.bar(range(1, 14), var_exp, alpha=0.5, align='center', label='ind explained variance') plt.step(range(1, 14), cum_var_exp, where='mid', label='cum explained variance') plt.ylabel('Explained Variance') plt.xlabel('Principle Components') plt.legend(loc=0) plt.show() from sklearn.lda import LDA lda = LDA(n_components=2) x_train_lda = lda.fit_transform(x_train, y_train)
final_data_no_targets = { 'M1 at Delivery': M1_d_data, 'S1 at Delivery': S1_d_data, 'PmD at Delivery': pmd_d_data, 'PmV at Delivery': pmv_d_data, 'M1 at Cue': M1_c_data, 'S1 at Cue': S1_c_data, 'PmD at Cue': pmd_c_data, 'PmV at Cue': pmv_c_data } np.save("multi_reward" + filename[-15:-4] + "_hists_" + name_of_bin, (final_data, unit_names)) #Perform PCA on PSTH followed By LDA on PCA transform of PSTH data and save figure showing results for each bin for key, value in final_data_no_targets.iteritems(): print key lda = LDA(n_components=2) pca = RandomizedPCA(n_components=20) proj = pca.fit_transform(value) proj = lda.fit_transform(proj, targets) print proj.shape plt.clf() plt.scatter(proj[:, 0], proj[:, 1], c=targets) plt.title(key + " from " + name_of_bin) plt.xlabel("LD1") plt.ylabel("LD2") plt.colorbar() plt.savefig(key + " from " + name_of_bin + "s.png") plt.clf()
def lda(X, y, components=10): lda = LDA(n_components=components) return lda.fit_transform(X, y)
np.mean(score) #0.913 #Convert array to dataframe df = pd.DataFrame(x, columns = ['C1', 'C2', 'C3']) #3 dimentional plot of principal components (eigenvectors) fig = plt.figure(1) ax = fig.add_subplot(111, projection = '3d') ax.scatter(df['C1'].tolist(), df['C2'].tolist(), df['C3'].tolist(), c = 'b', marker = '*') ax.set_xlabel('C1') ax.set_ylabel('C2') ax.set_zlabel('C3') ax.legend() #Linear discriminant analysis #Standardize da = LDA(n_components = 3) #Fit model and transform data a = da.fit_transform(X, Y) #Perform KNN algorithm neigh1 = KNeighborsClassifier(n_neighbors = 3) neigh1.fit(a, Y) #Get cross validation metrics #Create validation set with k-fold cross validation #Test for accuracy of validation set score1 = cross_val_score(neigh1, a, Y, scoring = 'accuracy', cv = 10) np.mean(score1) #0.973
def execute(self,i,j): global save1 global save2 jk=i # print type(jk) # dim_red = LDA() # dim_red.fit_transform(self.x_train, self.y_train) # with open('dumped_dim_red_'+str(i)+'.pkl', 'wb') as fid: # cPickle.dump(dim_red, fid) # x_train = dim_red.transform(self.x_train) # x_test = dim_red.transform(self.y_train) # stat_obj = self.stat_class() # reflection bitches # stat_obj.train(x_train, x_test) # print len(x_train) # with open('dumped_'+str(j)+'_'+str(i)+'.pkl', 'wb') as fid: # cPickle.dump(stat_obj, fid) # save1=None # save2=None kf = KFold(len(self.x_train), n_folds=self.k_cross) own_kappa = [] for train_idx, test_idx in kf: # print train_idx,test_idx # exit(0) x_train, x_test = self.x_train[train_idx], self.x_train[test_idx] y_train, y_test = self.y_train[train_idx], self.y_train[test_idx] dim_red = LDA() x_train = dim_red.fit_transform(x_train, y_train) # with open('dumped_dim_red_'+str(i)+'.pkl', 'wb') as fid: # cPickle.dump(dim_red, fid) # with open('dumped_dim_red_'+str(i)+'.pkl', 'rb') as fid: # dim_red=cPickle.load(fid) x_test = dim_red.transform(x_test) # with open('dumped_'+str(j)+'_'+str(i)+'.pkl', 'rb') as fid: # stat_obj=cPickle.load(fid) # x_train = dim_red.transform(x_train) # x_test = dim_red.transform(x_test) stat_obj = self.stat_class() # reflection bitches stat_obj.train(x_train,y_train) # with open('dumped_'+str(j)+'_'+str(i)+'.pkl', 'wb') as fid: # cPickle.dump(stat_obj, fid) # with open('dumped_'+str(j)+'_'+str(i)+'.pkl', 'rb') as fid: # stat_obj=cPickle.load(fid) y_pred = [ 0 for i in xrange(len(y_test)) ] if(int(jk)==1): # print "test_idx" save1=stat_obj save2=dim_red for i in range(len(x_test)): # print len(x_test[i]) val = int(np.round(stat_obj.predict(x_test[i]))) if val > self.range_max: val = self.range_max if val < self.range_min: val = self.range_min y_pred[i] = [val] y_pred = np.matrix(y_pred) cohen_kappa_rating = own_wp.quadratic_weighted_kappa(y_test,y_pred,self.range_min,self.range_max) self.values.append(cohen_kappa_rating) # print stat_obj.predict(x_train) # linear_k_cross = k_fold_cross_validation(cross_valid_k,linear_regression,X_train,Y_train,range_min,range_max) # linesar_accuracy.append(linear_k_cross.execute(i,0)) # logistic_k_cross = k_fold_cross_validation(cross_valid_k,logistic_regression,X_train,Y_train,range_min,range_max) # logistic_accuracy.append(logistic_k_cross.execute(i,1)) # svr_k_cross = k_fold_cross_validation(cross_valid_k,support_vector_regression,X_train,Y_train,range_min,range_max) # svr_accuracy.append(svr_k_cross.execute(i,2)) # svm_k_cross = k_fold_cross_validation(cross_valid_k,support_vector_machine,X_train,Y_train, range_min,range_max) # svm_accuracy.append(svm_k_cross.execute(i,3)) return str(sum(self.values)/self.k_cross)
def FLD_r(X, y): # can change n_components to desired value fld = LDA() return fld.fit_transform(X, y)
def lda_projected_X(X, y, n): pca = LDA(n_components=n) return pca.fit_transform(X, y)
# Project the tranformed points w_lda = np.dot(np.linalg.inv(s_within), mean_1 - mean_2) w_lda = w_lda / np.linalg.norm(w_lda) label_1_projected = np.dot(label_1_transformed, w_lda) label_2_projected = np.dot(label_2_transformed, w_lda) # plot the transformed points plt.plot(label_1_projected, np.zeros_like(label_1_projected), 'o', c='r') plt.plot(label_2_projected, np.zeros_like(label_2_projected), 'o', c='b') plt.show() # LDA using sklearn from sklearn.lda import LDA points = np.concatenate((label_1, label_2), axis=0) label = np.concatenate((np.zeros((20, 1)), np.zeros((20, 1)) + 1)) lda = LDA() skl_transform = lda.fit_transform(points, label) f, ax = plt.subplots(1, 2) ax[0].plot(skl_transform[:20], np.zeros_like(label_1_projected), 'o', c='r') ax[0].plot(skl_transform[20:], np.zeros_like(label_2_projected), 'o', c='b') ax[0].set_title('SKlearn Projection') ax[1].plot(label_1_projected, np.zeros_like(label_1_projected), 'o', c='r') ax[1].plot(label_2_projected, np.zeros_like(label_2_projected), 'o', c='b') ax[1].set_title('Manual Projection') plt.show()
plt.title('Projection onto first 2 PC space') plt.xlabel('Principle Component 1') plt.ylabel('Principle Component 2') fig1.savefig('./Plots/2_PCA1.png') # Generate scatter plot on the second 2 PC spacefig2 = plt.figure() fig2 = plt.figure(figsize=(16.0, 9.0)) plt.scatter( x=pc_data[:,2], y=pc_data[:,3], c=[color_dict[c] for c in df.iloc[:,0]] ) plt.title('Projection onto second 2 PC space') plt.xlabel('Principle Component 3') plt.ylabel('Principle Component 4') fig2.savefig('./Plots/2_PCA2.png') # Apply LDA to project PC space onto first 2 LD space lda = LDA() ld_data = lda.fit_transform(X=pc_data, y=df.iloc[:,0]) fig3 = plt.figure(figsize=(16.0, 9.0)) plt.scatter( x=ld_data[:,0], y=ld_data[:,1], c=[color_dict[c] for c in df.iloc[:,0]] ) plt.title('Projection onto first 2 LD space') plt.xlabel('Linear Discriminant 1') plt.ylabel('Linear Discriminant 2') fig3.savefig('./Plots/2_LDA1.png') # Apply LDA to project PC space onto second 2 LD space fig4 = plt.figure(figsize=(16.0, 9.0)) plt.scatter( x=ld_data[:,2], y=ld_data[:,3], c=[color_dict[c] for c in df.iloc[:,0]] ) plt.title('Projection onto second 2 LD space') plt.xlabel('Linear Discriminant 3') plt.ylabel('Linear Discriminant 4') fig4.savefig('./Plots/2_LDA2.png')
train.head() # In[ ]: lda = LDA(n_components=2) pca = PCA(n_components=2) # In[ ]: scaler = StandardScaler() trains = scaler.fit_transform(train) trans = pca.fit_transform(trains, labels) dfp = pd.DataFrame(trans, index=train.index) trans = lda.fit_transform(trains, labels) df = pd.DataFrame(trans, columns=["lda"], index=train.index) # In[ ]: df["labels"] = labels dfp["labels"] = labels # In[ ]: df.plot(kind='scatter', x='lda', y='labels') # In[ ]: dfp.plot(kind='scatter', x='pca1', y='pca2', c="labels")