def apply_LDA(self, X,y, solver, shrinkage): if solver == 'svd': pca = lda(solver = solver) pca.fit(X, y) else: pca = lda(solver = solver, shrinkage = shrinkage) pca.fit(X, y) return pca.transform(X)
def plot_lda_decision_boundaries(two_lda_dimensions, label_matrix): # Create mesh_matrix, a mesh of points in space of first two linear discriminants ldone_min, ldone_max = two_lda_dimensions[:, 0].min( ) - 1, two_lda_dimensions[:, 0].max() + 1 ldtwo_min, ldtwo_max = two_lda_dimensions[:, 1].min( ) - 1, two_lda_dimensions[:, 1].max() + 1 ldoneone, ldtwotwo = np.meshgrid(np.linspace(ldone_min, ldone_max, 500), np.linspace(ldtwo_min, ldtwo_max, 500)) mesh_matrix = np.c_[ldoneone.ravel(), ldtwotwo.ravel()] # Instantiate LDA model and fit LDA model on two_lda_dimensions lda_model = lda(n_components=2) lda_model.fit(two_lda_dimensions, label_matrix) # Use LDA model to make categorical predictions on mesh_matrix mesh_predictions = lda_model.predict(mesh_matrix) # Map categorical predictions into numerical values for contour plotting speciesmap = {'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2} f = lambda x: speciesmap[x] fv = np.vectorize(f) Z = fv(mesh_predictions).reshape((len(ldoneone), len(ldtwotwo))) # Make contour plot plt.contourf(ldoneone, ldtwotwo, Z, levels=[-0.5, 0.5, 1.5, 2.5], colors=('orange', 'black', 'grey'), alpha=0.4)
def lda_(values, class1, class2, labels, features): # use fisher for finding best line clf = lda() clf.fit(features, labels) w_ = clf.coef_ b_ = clf.intercept_ class1_projected = np.dot(class1, w_.T) + b_ class2_projected = np.dot(class2, w_.T) + b_ class1_mean_projected = np.mean(class1_projected) class2_mean_projected = np.mean(class2_projected) class1_dist_to_mean = class1_projected - class1_mean_projected class2_dist_to_mean = class2_projected - class2_mean_projected means_dist = class2_mean_projected - class1_mean_projected mems1 = (means_dist - class1_dist_to_mean) / means_dist mems1[mems1 > 1] = 1 mems1[mems1 < 0] = 0 mems2 = (means_dist + class2_dist_to_mean) / means_dist mems2[mems2 > 1] = 1 mems2[mems2 < 0] = 0 mems = np.concatenate((mems1, mems2)) return testLeaveOneOut(features, labels, mems.ravel())
def exeML(mlmethod, xtr, ytr, xte, yte, islog=True, isfeatureselection=True): if islog: xtr = np.log(np.abs(xtr)).tolist() ytr = np.log(np.abs(ytr)).tolist() xte = np.log(np.abs(xte)).tolist() yte = np.log(np.abs(yte)).tolist() if isfeatureselection: estimator = SVR(kernel="linear") selector = RFE(estimator, 100, step=1) selector = selector.fit(xtr, ytr) xtr = np.array(xtr)[:, selector.support_].tolist() xte = np.array(xte)[:, selector.support_].tolist() np.random.seed(1000) if mlmethod == "SVM": clf = svm.SVR(kernel='poly') elif mlmethod == "NeaNei": clf = NearestCentroid() elif mlmethod == "dtree": clf = tree.DecisionTreeClassifier() elif mlmethod == "lda": clf = lda(solver="svd") predval = [] clf.fit(xtr, ytr) for i in range(len(xte)): predval.append(np.float(clf.predict(xte[i]))) return predval
def usarLDA(array,e): Lda = lda() caracteres = np.vstack(array) E = np.array(e) Lda.fit_transform(caracteres,E) CR = Lda.transform(caracteres) CR = CR.astype(np.float32, copy=True) return CR,Lda
def LDA_DR(X, y): #线形判别分析(Linear Discriminant Analysis,LDA)从64维降到2,3维 logging.info("Computing LDA projection") X = np.array(X) X2 = X.copy() X2.flat[::X.shape[1] + 1] += 0.01 # Make X invertible t0 = time() X_lda = lda(n_components=3).fit_transform(X2, y) plot_embedding_2d(X_lda[:, 0:2], y, "LDA of Kmeans")
def plot_simple_demo_lda(): pylab.clf() fig = pylab.figure(num=None, figsize=(10, 4)) pylab.subplot(121) title = "Original feature space" pylab.title(title) pylab.xlabel("$X_1$") pylab.ylabel("$X_2$") good = x1 > x2 bad = ~good x1g = x1[good] x2g = x2[good] pylab.scatter(x1g, x2g, edgecolor="blue", facecolor="blue") x1b = x1[bad] x2b = x2[bad] pylab.scatter(x1b, x2b, edgecolor="red", facecolor="white") pylab.grid(True) pylab.subplot(122) X = np.c_[(x1, x2)] lda_inst = lda(n_components=1) Xtrans = lda_inst.fit_transform(X, good) Xg = Xtrans[good] Xb = Xtrans[bad] pylab.scatter(Xg[:, 0], np.zeros(len(Xg)), edgecolor="blue", facecolor="blue") pylab.scatter(Xb[:, 0], np.zeros(len(Xb)), edgecolor="red", facecolor="white") title = "Transformed feature space" pylab.title(title) pylab.xlabel("$X'$") fig.axes[1].get_yaxis().set_visible(False) pylab.grid(True) pylab.autoscale(tight=True) filename = "lda_demo.png" pylab.savefig(os.path.join(CHART_DIR, filename), bbox_inches="tight")
def train(self, labels, erps): print "training..." self.clf = lda() self.clf.fit(erps, labels) scores = cross_validation.cross_val_score(self.clf, erps, labels, cv=10) print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) joblib.dump(self.clf, 'model/lda.pkl')
def train(self, labels, erps): if self.factor != 1: erps = convert.erp.decimate(erps, self.factor) self.frame_length = len(erps[0]) / 8 ( b, se, pval, inmodel, stats, nextstep, history ) = stepwisefit( erps, labels, maxiter = 60, penter = 0.1, premove = 0.15) self.index = inmodel erps = [np.array(erp)[self.index] for erp in erps] self.clf = lda() self.clf.fit(erps, labels) scores = cross_validation.cross_val_score(self.clf, erps, labels, cv=6) print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) self.show_feature() joblib.dump(self.clf, 'model/swlda-%s.pkl' % self.name) np.save("model/swlda-index-%s.npy" % self.name, self.index)
def StratifiedShuffleSplit_cross_validate_func_lda(X, y,partitioner) -> (np.array, np.array,np.array): runs = 4 lDA= np.empty([runs]) accuracy_list=[] error_rate_list=[] for i in range(runs): lda_results = cross_validate(lda(), X, y, scoring="accuracy", cv=partitioner) lDA[i] = np.mean(lda_results["test_score"]) error_rate_lda = 1-lDA[i] print("lDA[i]") print(lDA[i]) print("error_rate_lda") print(error_rate_lda) accuracy_list.append(lDA[i]) error_rate_list.append(error_rate_lda) plt.plot(error_rate_list) plt.show() plt.plot(accuracy_list) plt.show()
X = dataset.iloc[:, 1:5].values y = dataset.iloc[:, 5].values # c) Data Transforms # 4. Evaluate Algorithms # a) Split-out validation dataset X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # b) Test options and evaluation metric # c) Spot Check Algorithms # now it is time to evaluate some of the appropriate algorithms that can fit our problem. models = [] models.append(('Log-Reg', LogisticRegression())) models.append(('LDA', lda())) models.append(('KNN', KNeighborsClassifier())) models.append(('CART', DecisionTreeClassifier())) models.append(('NB', GaussianNB())) models.append(('SVM', SVC())) # evaluate each model in turns # d) Compare Algorithms results = [] names = [] for name, model in models: kfold = KFold(n_splits=10, random_state=42) cv_results = cross_val_score(model,
def optimize_cluster(inds1, inds2, log=None): ###### CONDITION inds1, inds_on_wall1, mask1 = inds1 inds2, inds_on_wall2, mask2 = inds2 if not np.any(np.logical_and(mask1, mask2)): return mask1, mask2, 0.0 mask1_erode = cv2.erode(mask1, np.ones((3, 3), dtype=np.uint8)) mask2_erode = cv2.erode(mask2, np.ones((3, 3), dtype=np.uint8)) mps2d_pix1 = mps2d_pix[inds_on_wall1] mps2d_pix2 = mps2d_pix[inds_on_wall2] num_error1 = np.sum(mask2_erode[mps2d_pix1[:, 1], mps2d_pix1[:, 0]]) num_error2 = np.sum(mask1_erode[mps2d_pix2[:, 1], mps2d_pix2[:, 0]]) total_error = (num_error1 + num_error2) / (mps2d_pix1.shape[0] + mps2d_pix2.shape[0]) # print(num_error1) # print(num_error2) # print() # # exit() if num_error1 / mps2d_pix1.shape[ 0] < 0.01 and num_error2 / mps2d_pix2.shape[0] < 0.01: return mask1, mask2, total_error mps2d1 = mps2d[inds_on_wall1] mps2d2 = mps2d[inds_on_wall2] kfs2d1 = np.stack([kfs2d_uni[dict_mp2kf[i]] for i in inds_on_wall1]) kfs2d2 = np.stack([kfs2d_uni[dict_mp2kf[i]] for i in inds_on_wall2]) ds1 = mps2d1 - kfs2d1 ds2 = mps2d2 - kfs2d2 d_norms1 = np.expand_dims(np.linalg.norm(ds1, axis=1), 1) d_norms2 = np.expand_dims(np.linalg.norm(ds2, axis=1), 1) nrms = np.matmul(d_norms1, d_norms2.T) dots = np.matmul(ds1, ds2.T) dots = np.where(nrms > 0.0, dots / nrms, 0.0) inds_overlap = np.where(dots < -0.5) if not len(inds_overlap) > 0: s = np.sum(mask1 * mask2) a = np.sum(mask1 + mask2 > 0) return mask1, mask2, total_error # print(s / a, np.min(dots), np.max(dots)) x1 = np.where(mask1 > 0) x2 = np.where(mask2 > 0) x1 = np.stack([x1[0], x1[1]], axis=1) x2 = np.stack([x2[0], x2[1]], axis=1) # plt.scatter(x1[:, 0], x1[:, 1], c="red") # plt.scatter(x2[:, 0], x2[:, 1], c="green") # plt.show() ###### GRADIENT x = np.concatenate([x1, x2], axis=0) y = np.asarray([0] * x1.shape[0] + [1] * x2.shape[0]) model = lda() model.fit(x, y) r = np.fliplr(model.coef_) if np.sum(r) == 0: raise ValueError r = r / np.linalg.norm(r) # r = np.asarray([[1.0, 0.0]]).astype(np.float32) lambda1 = 0.05 avg1 = np.average(x1, axis=0) avg2 = np.average(x2, axis=0) d12 = avg2 - avg1 norm_d12 = np.linalg.norm(d12) ##### UPDATE switch = (np.dot(r, d12) > 0).astype(np.float32) * 2 - 1 mps2d[inds_on_wall1] += switch * r * lambda1 mps2d[inds_on_wall2] -= switch * r * lambda1 kfs2d[inds_on_wall1] += switch * r * lambda1 kfs2d[inds_on_wall2] -= switch * r * lambda1 mps2d_pix1, kfs2d_pix1 = pts_float2pixel(mps2d[inds_on_wall1], kfs2d[inds_on_wall1], w, h, xm, xM, ym, yM) mps2d_pix2, kfs2d_pix2 = pts_float2pixel(mps2d[inds_on_wall2], kfs2d[inds_on_wall2], w, h, xm, xM, ym, yM) mps2d_pix1[:, 0] = np.clip(mps2d_pix1[:, 0], a_min=0, a_max=w) mps2d_pix2[:, 0] = np.clip(mps2d_pix2[:, 0], a_min=0, a_max=w) kfs2d_pix1[:, 0] = np.clip(kfs2d_pix1[:, 0], a_min=0, a_max=w) kfs2d_pix2[:, 0] = np.clip(kfs2d_pix2[:, 0], a_min=0, a_max=w) mps2d_pix1[:, 1] = np.clip(mps2d_pix1[:, 1], a_min=0, a_max=h) mps2d_pix2[:, 1] = np.clip(mps2d_pix2[:, 1], a_min=0, a_max=h) kfs2d_pix1[:, 1] = np.clip(kfs2d_pix1[:, 1], a_min=0, a_max=h) kfs2d_pix2[:, 1] = np.clip(kfs2d_pix2[:, 1], a_min=0, a_max=h) for i in inds_on_wall1: kfs2d_uni[dict_mp2kf[i]] = kfs2d[i] for i in inds_on_wall2: kfs2d_uni[dict_mp2kf[i]] = kfs2d[i] mps2d_pix[inds_on_wall1] = mps2d_pix1 mps2d_pix[inds_on_wall2] = mps2d_pix2 mask1 = make_only_gridmap(mps2d_pix1, kfs2d_pix1, w, h, xm, xM, ym, yM) mask2 = make_only_gridmap(mps2d_pix2, kfs2d_pix2, w, h, xm, xM, ym, yM) mask1 = (mask1 != 127).astype(np.uint8) mask2 = (mask2 != 127).astype(np.uint8) return mask1, mask2, total_error
# plt.figure() # plt.scatter(all_reduced[:,0],all_reduced[:,1], # c =opto, s=20) # plt.colorbar() # plt.xlabel('PCA1');plt.ylabel('PCA2') # # # fig = plt.figure() # ax = Axes3D(fig) # p = ax.scatter(all_reduced[:,0],all_reduced[:,1],all_reduced[:,2], # c =opto ,s=20) # fig.colorbar(p) # ============================================================================= ## LDA clf = lda() clf.fit(all_reduced, opto) fit_coefs = clf.coef_[0] best_sep = np.argsort(np.abs(fit_coefs))[-3:] plt.figure() plt.scatter(all_reduced[:,best_sep[2]],all_reduced[:,best_sep[1]],c=opto) plt.colorbar() # ============================================================================= # fig = plt.figure() # ax = Axes3D(fig) # p = ax.scatter(all_reduced[:,best_sep[0]],all_reduced[:,best_sep[1]],all_reduced[:,best_sep[2]], # c =opto,s=20) # fig.colorbar(p) # =============================================================================
#| | | | |_| | | |_ \ V / (_| | | | |_| | \__ \ (__ #|_| |_|\__,_|_|\__| \_/ \__,_|_| |____/|_|___/\___| # # Extract the non-responsive population and check whether we can # use those neurons to discriminate tastes using a multivariate analysis non_taste_firing = data.all_normal_off_firing[taste_p_vec>0.05,:,\ time_bounds[0]:time_bounds[1]] # At every time-point, check accuracy of classification using LDA labels = np.sort(list(range(4)) * 15) # Use shuffle splits to estimate accuracy predictions for each component bootstrap_iters = 10 score_array = np.zeros((non_taste_firing.shape[-1], bootstrap_iters)) cv = ShuffleSplit(n_splits=bootstrap_iters, test_size=0.25, random_state=0) clf = lda(solver='eigen', shrinkage='auto') for t_bin in trange(non_taste_firing.shape[-1]): score_array[t_bin] = cross_val_score(clf, non_taste_firing[:, :, t_bin].T, labels, cv=cv) dat_imshow(score_array) plt.show() plt.errorbar(x=np.arange(score_array.shape[0]), y=np.mean(score_array, axis=-1), yerr=np.std(score_array, axis=-1)) plt.show()
data_type = 'cropped_roi' # file paths test_data_path = f'networks/data/sgp/{data_id}/{data_type}/*' img_save_path = 'networks/reconstructed_roi/lda' # mkdir if not exists Path(f'{img_save_path}').mkdir(parents=True, exist_ok=True) # prepare training set print('Prepare training data..') channel_train, y_true, channel_len = load_raw_labeled_data() # fit lda print('Model training..') classifier = lda() classifier.fit(channel_train, y_true) precision_clf = classifier.score(channel_train, y_true) prediction = classifier.predict(channel_train) balanced_acc = balanced_accuracy_score(y_true, prediction) kappa = cohen_kappa_score(y_true, prediction) # plot learning curve plot_learning_curve(classifier, 'learning curve of LDA', channel_train, y_true) print('train-accuracy: ', precision_clf) print('balanced-accuracy: ', balanced_acc) print('kappa: ', kappa) # prepare test data print('Prepare test data..') imgs = load_raw_images_data(test_data_path,
c_idx.append(idx) for i in range(len(w)): if i in c_idx: w[i] = w[i]*np.exp(alpha) else: w[i] = w[i]*np.exp(-1*alpha) zt = np.sum(w) w = np.array(w) / zt return w clf1 = LogisticRegression(solver='sag') clf2 = lda() clf3 = DecisionTreeClassifier(max_depth=2) clf4 = KNeighborsClassifier(n_neighbors=1) clf5 = KNeighborsClassifier(n_neighbors=10) clfs = [clf1,clf2,clf3,clf4,clf5] layer2_tr = None layer2_te = None for idx,model in enumerate(clfs): for i in range(5): x,y = Bootstrap(x_tr,y_tr,weight=weight,factor = 0.8) model.fit(x, y) prdtr = model.predict(x_tr) prdte = model.predict(x_te) if layer2_tr is None:
# computation of PCA projection print('Computation of PCA Projection') X_pca = (decomposition .PCA(svd_solver='randomized', n_components=2) .fit_transform(X)) plot_embedding(X_pca, 'PCA '+title, j=1) # computation of the LDA projection print('Computation of the LDA Projection') X2 = X.copy() X2.flat[::X.shape[1] + 1] += 0.01 # make X invertible X_lda = lda(n_components=2).fit_transform(X2, y) plot_embedding(X_lda, 'LDA '+title, j=2) # computation of the ISOMAP projection print('Computation of the ISOMAP Projection') X_iso = manifold.Isomap(K, n_components=2).fit_transform(X) plot_embedding(X_iso, 'ISOMAP '+title, j=3) # computation of the LLE projection print('Computation of the LLE Projection') clf = manifold.LocallyLinearEmbedding(K, n_components=2, method='standard') X_lle = clf.fit_transform(X) plot_embedding(X_lle, 'LLE '+title, j=4)
def individual(df, label1, label2): ''' df: first-last (17 features with DHAMD scores and 1 column of DRUG names) drug_name: name of drug you want to compare with Placebo ''' df['DRUG'].replace(label1, 1, inplace=True) df['DRUG'].replace(label2, 0, inplace=True) df = df[(df['DRUG']==1) | (df['DRUG']==0)].reset_index(drop=True) #fit LDA model y = df['DRUG'].astype(int) X = df.drop(['DRUG'],axis=1) clf = lda() model = clf.fit(X,y) df_pn = pd.DataFrame(model.coef_,index=['PN']).T #get weightings df_w = pd.DataFrame(np.absolute(model.coef_),index=['Weightings']).sort_values(by='Weightings', axis=1,ascending=False).T #sort values and add descriptions df_w['Polarity'] = '' df_w['Description'] = '' df_w['True_Weightings'] = '' df_w['HAMD_Name'] = '' idx = list(df_w.index) for i in idx: #add corresponding descriptions df_w.loc[i,'Description'] = desc[str(i)] df_w.loc[i,'HAMD_Name'] = 'HAM-D '+str(i+1) #get polarity if df_pn.loc[i,'PN'] > 0: df_w.loc[i,'Polarity'] = 'positive' df_w.loc[i,'True_Weightings'] = df_w.loc[i,'Weightings'] else: df_w.loc[i,'Polarity'] = 'negative' df_w.loc[i,'True_Weightings'] = -1*df_w.loc[i,'Weightings'] pvals = pd.Series([]) sigs = pd.Series([]) ci_up = pd.Series([]) ci_lo = pd.Series([]) ci = pd.Series([]) sig_2 = pd.Series([]) drug = df.loc[df['DRUG']!=0] placebo = df.loc[df['DRUG']==0] for i in range(len(df_w.Weightings)): N1=len(drug[drug.columns[i]]) N2=len(placebo[placebo.columns[i]]) d_f = (N1 + N2 - 2) std1 = drug[drug.columns[i]].std() std2 = placebo[placebo.columns[i]].std() std_N1N2 = sqrt( ((N1 - 1)*(std1)**2 + (N2 - 1)*(std2)**2) / d_f) diff_mean = drug[drug.columns[i]].mean() - placebo[placebo.columns[i]].mean() MoE = t.ppf(0.975, d_f) * std_N1N2 * sqrt(1/N1 + 1/N2) ci_up[i] = diff_mean + MoE ci_lo[i] = diff_mean - MoE sig_2[i] = ((diff_mean + MoE) * (diff_mean - MoE)>0) ci[i] = (diff_mean - MoE, diff_mean + MoE) tset, pval = ttest_1samp(df_w.Weightings, df_w.Weightings[i]) sig = ttest_ind(drug[drug.columns[i]], placebo[placebo.columns[i]], equal_var = False) pvals[i]=pval sigs[i]=sig[1] df_w.insert(3, 'weightings p-value', pvals) df_w.insert(4, '2-sample t-test p-value', sigs) df_w.insert(5, '2-sample t-test ci', ci) df_w.insert(6, '2-sample t-test ci upper', ci_up) df_w.insert(7, '2-sample t-test ci lower', ci_lo) df_w.insert(8, 'Significant', sig_2) return (df_w, df)
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as lda import numpy as np pkl_dir = '/home/user/Documents/Kaggle/CreditDefaultRisk/EngineeredData' # _, pkl_dir = directory_table.get_paths(station='Subgraph') training_df = np.load(pkl_dir + r'\train_df.npy') target = np.load(pkl_dir + r'\target.npy') predicting_df = np.load(pkl_dir + r'\predict_df.npy') model = lda() model.fit(training_df, target[:, 0]) pred = model.predict_proba(predicting_df)
# all_spikes = all_spikes[:,nrn,:,2000:4000] # if not (np.sum((np.sum(all_spikes,axis=2) == 0).flatten()) > 0): # ============================================================================= this_off = np.asarray(data.all_normal_off_firing) this_off = this_off[:,:,80:160] total_this_off = this_off[0,:,:] for nrn in range(1,this_off.shape[0]): total_this_off = np.concatenate((total_this_off,this_off[int(nrn),:,:]),axis=1) reduced_stim_pca = pca(n_components = 45).fit(total_this_off) reduced_stim = reduced_stim_pca.transform(total_this_off) plt.plot(np.cumsum(reduced_stim_pca.explained_variance_ratio_)) ## Identity clf = lda() clf.fit(reduced_stim, tastes) fit_coefs = clf.coef_[0] best_sep = np.argsort(np.abs(fit_coefs))[-3:] plt.figure() plt.scatter(reduced_stim[:,best_sep[2]],reduced_stim[:,best_sep[1]],c=tastes) plt.colorbar() clf.score(reduced_stim, tastes) fig = plt.figure() ax = Axes3D(fig) p = ax.scatter(reduced_stim[:,best_sep[0]],reduced_stim[:,best_sep[1]],reduced_stim[:,best_sep[2]], c =tastes,s=20) fig.colorbar(p)
import pandas as pd import numpy as np df = pd.read_csv("/home/shaury/Downloads/nptel/Iris.csv", delimiter=",") x, y = df[["SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "PetalWidthCm"]], df["Species"] from sklearn.model_selection import train_test_split x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=1, test_size=0.15) from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as lda l1 = lda(n_components=1) x_train = l1.fit_transform(x_train, y_train) x_test = l1.fit_transform(x_test, y_test) from sklearn.ensemble import RandomForestClassifier as RFC cl = RFC(max_depth=2, random_state=0) cl.fit(x_train, y_train) y_pred = cl.predict(x_test) from sklearn.metrics import confusion_matrix, accuracy_score cm = confusion_matrix(y_test, y_pred) print(cm) print('Accuracy' + str(accuracy_score(y_test, y_pred)))
tuning_param = [{ 'C': [0.01, 0.1, 1, 5, 10, 100], 'gamma': [0.01, 0.1, 1, 5, 10, 100] }] svm_fit = GridSearchCV(SVC(kernel='rbf'), tuning_param, cv=10) svm_fit.fit(data_x, data_y) svm_fit.best_params_ #{'C': 0.01, 'gamma': 0.1} #Fit the model using the parameters found svm_best_fit = SVC(kernel='rbf', C=0.01, gamma=0.1) svm_best_fit.fit(x_clas_train, y_clas_train) np.mean(svm_best_fit.predict(x_clas_cv) - y_clas_cv) #0.5833333333333334 #LDA lda_fit = lda() lda_fit.fit(x_clas_train, y_clas_train) np.mean(lda_fit.predict(x_clas_cv) - y_clas_cv) #0.20833333333333334 #QDA qda_fit = qda() qda_fit.fit(x_clas_train, y_clas_train) np.mean(qda_fit.predict(x_clas_cv) - y_clas_cv) #0.4583333333333333
groups = gmm.predict(reduced_stim) all_groups.append(sum(groups)) trial_order = np.argsort(groups) # Train LDA classifier on firing from both clusters repeats = 500 stim_acc = [] for i in range(repeats): test_stim = np.random.choice(np.arange(15), size=1, replace=False)[0] train_stim = np.arange(15) train_stim = np.delete(train_stim, test_stim) stim_lda = lda() stim_lda.fit(reduced_stim[train_stim, :], groups[train_stim]) stim_acc.append( sum( stim_lda.predict(reduced_stim[test_stim, :][np.newaxis, :]) == groups[test_stim])) #print('explained_var = %.3f, accuracy = %.3f' % (explained_var_stim,accuracy)) class_acc.append(np.mean(stim_acc)) # ============================================================================= # ============================================================================= # Pull out and cluster distance matrices this_dist = off_stim_dists[taste] clust_dist = this_dist[trial_order, :] clust_dist = clust_dist[:, trial_order]
# LDA - 2 - PCA/LDA (Can't handle 100k) t = time.time() A3 = LDA(n_components=1, method='twostage').fit(X, Y)[1] Z1 = np.dot(X, A3[:, 0]) print('PCA + LDA: %.2f ms' % ((time.time() - t) * 1000)) # Z1 = X[:, 0] # LDA - 3 - QR-LDA - big & fast, max output dim = k t = time.time() A4 = LDA(n_components=1, method='qrsvd').fit(X, Y)[1] Z2 = np.dot(X, A4[:, 0]) print('QR LDA: %.2f ms' % ((time.time() - t) * 1000)) # LDA - SK - big & med, max output dim = k-1 t = time.time() Z3 = lda(n_components=1).fit_transform(X, Y) print('SciKit LDA: %.2f ms' % ((time.time() - t) * 1000)) # LDA - SRDA - big & med, max output dim = k+1 t = time.time() A5 = LDA(n_components=1, method='srda').fit(X, Y) Z5 = np.dot(X, A5) print('SRDA: %.2f ms' % ((time.time() - t) * 1000)) if np.min(Z[Y == 1]) < np.min(Z[Y == -1]): F0 = np.sum(Z[Y == 1] < np.min(Z[Y == -1])) + np.sum( Z[Y == -1] > np.max(Z[Y == 1])) else: F0 = np.sum(Z[Y == 1] > np.max(Z[Y == -1])) + np.sum( Z[Y == -1] < np.min(Z[Y == 1])) if np.min(Z0[Y == 1]) < np.min(Z0[Y == -1]):
import numpy as np import math from matplotlib import pyplot as plt from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as lda names = [ 'Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width', 'Class Label' ] df = pd.io.parsers.read_csv("iris.data") df.columns = names df.head() features = df.drop('Class Label', axis=1) classlabels = df['Class Label'] sklearn_lda = lda(n_components=2) sklearn_lda_features = sklearn_lda.fit_transform(features, classlabels) def plot_lda(two_lda_dimensions, label_matrix, title): # Make scatter plot, with labels and colors for label, marker, color in zip( ('Iris-setosa', 'Iris-versicolor', 'Iris-virginica'), ('^', 's', 'o'), ('orange', 'black', 'grey')): plt.scatter(x=two_lda_dimensions[:, 0][label_matrix == label], y=two_lda_dimensions[:, 1][label_matrix == label], marker=marker, color=color, label=label)
all_off_firing = data.all_normal_off_firing all_off_firing_long = all_off_firing[0, :, :] for nrn in range(1, all_off_firing.shape[0]): all_off_firing_long = np.concatenate( (all_off_firing_long, all_off_firing[int(nrn), :, :]), axis=1) all_off_red_pca = pca(n_components=20).fit(all_off_firing_long) all_off_red = all_off_red_pca.transform(all_off_firing_long) plt.imshow(exposure.equalize_hist(all_off_red)) groups = np.sort(np.asarray([0, 1, 2, 3] * 15)) plt.figure() plt.scatter(all_off_red[:, 0], all_off_red[:, 1], c=groups) plt.colorbar() taste_lda = lda().fit(all_off_red, groups) print(np.mean(taste_lda.predict(all_off_red) == groups)) trial_dist = dist_mat(all_off_firing_long, all_off_firing_long) plt.figure() plt.imshow(exposure.equalize_hist(trial_dist)) ## n_components = 3 taste = 1 pre_inds = np.arange(0, 80) post_inds = np.arange(80, 160) this_off = data.normal_off_firing[taste] this_off_pre = this_off[:, :, pre_inds]
lr_y_test_pred = lr.predict(X_test_pca) print( "With PCA, Logistic Regression accurancy score for testing: ",metrics.accuracy_score(y_test, lr_y_test_pred) ) ## SVM from sklearn.svm import SVC svm = SVC(kernel='linear', C=1.0, random_state=0) svm.fit(X_train_pca, y_train) svm_train_predict = svm.predict(X_train_pca) svm_test_predict = svm.predict(X_test_pca) print("With PCA, SVM Accuracy Score for Training: ", metrics.accuracy_score(y_train, svm_train_predict)) print("With PCA, SVM Accuracy Score for Testing: ", metrics.accuracy_score(y_test, svm_test_predict)) print(" ") ## Part 4 LDA from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as lda lda = lda(n_components=2) X_train_lda = lda.fit_transform(X_train_std, y_train) X_test_lda = lda.transform(X_test_std) ## Logistic Regression from sklearn.linear_model import LogisticRegression lr = LogisticRegression() lr.fit(X_train_lda, y_train) lr_y_train_pred = lr.predict(X_train_lda) print( "Part 4\n", "With LDA, Logistic Regression accurancy score for training: ",metrics.accuracy_score(y_train, lr_y_train_pred) ) lr_y_test_pred = lr.predict(X_test_lda) print( "With LDA, Logistic Regression accurancy score for testing: ",metrics.accuracy_score(y_test, lr_y_test_pred) ) ## SVM from sklearn.svm import SVC svm = SVC(kernel='linear', C=1.0, random_state=0) svm.fit(X_train_lda, y_train)
def performance_evaluation(args, output_array, folds, label_list, best_parameter_pair): if args.method == 'SVM': temp_str = 'The best parameter for SVM is: cost = ' + str( best_parameter_pair['cost']) + ', gamma = ' + str( best_parameter_pair['gamma']) # print(temp_str.center(40, '+')) results = [] true_labels = [] predict_labels = [] predict_probability = [] for train, test in folds: x_train = output_array[train] x_test = output_array[test] y_train = label_list[train] y_test = label_list[test] classification = svm.SVC(C=2**best_parameter_pair['cost'], gamma=2**best_parameter_pair['gamma'], probability=True) classification.fit(x_train, y_train) y_test_predict = classification.predict(x_test) y_test_prob_predict = classification.predict_proba(x_test)[:, 1] result = evaluation(y_test, y_test_predict) results.append(result) true_labels.append(y_test) predict_labels.append(y_test_predict) predict_probability.append(y_test_prob_predict) plot_roc_curve(true_labels, predict_probability, args.result_dir) plot_pr_curve(true_labels, predict_probability, args.result_dir) final_result = np.array(results).mean(axis=0) result_print(final_result) elif args.method == 'LinearSVM': temp_str = 'The best parameter for Linear SVM is: cost = ' + str( best_parameter_pair['cost']) # print(temp_str.center(40, '+')) results = [] true_labels = [] predict_labels = [] predict_probability = [] for train, test in folds: x_train = output_array[train] x_test = output_array[test] y_train = label_list[train] y_test = label_list[test] classification = svm.SVC(C=2**best_parameter_pair['cost'], kernel="linear", probability=True) classification.fit(x_train, y_train) y_test_predict = classification.predict(x_test) y_test_prob_predict = classification.predict_proba(x_test)[:, 1] result = evaluation(y_test, y_test_predict) results.append(result) true_labels.append(y_test) predict_labels.append(y_test_predict) predict_probability.append(y_test_prob_predict) plot_roc_curve(true_labels, predict_probability, args.result_dir) plot_pr_curve(true_labels, predict_probability, args.result_dir) final_result = np.array(results).mean(axis=0) result_print(final_result) elif args.method == 'RF': temp_str = 'The best parameter for RF is: tree = ' + str( best_parameter_pair['tree']) # print(temp_str.center(40, '+')) results = [] true_labels = [] predict_labels = [] predict_probability = [] for train, test in folds: x_train = output_array[train] x_test = output_array[test] y_train = label_list[train] y_test = label_list[test] classification = RandomForestClassifier( random_state=42, n_estimators=best_parameter_pair['tree']) classification.fit(x_train, y_train) y_test_predict = classification.predict(x_test) y_test_prob_predict = classification.predict_proba(x_test)[:, 1] result = evaluation(y_test, y_test_predict) results.append(result) true_labels.append(y_test) predict_labels.append(y_test_predict) predict_probability.append(y_test_prob_predict) plot_roc_curve(true_labels, predict_probability, args.result_dir) plot_pr_curve(true_labels, predict_probability, args.result_dir) final_result = np.array(results).mean(axis=0) result_print(final_result) elif args.method == 'KNN': temp_str = 'The best parameter for KNN is: neighbors = ' + str( best_parameter_pair['ngb']) # print(temp_str.center(40, '+')) results = [] true_labels = [] predict_labels = [] predict_probability = [] for train, test in folds: x_train = output_array[train] x_test = output_array[test] y_train = label_list[train] y_test = label_list[test] classification = KNeighborsClassifier( n_neighbors=best_parameter_pair['ngb']) classification.fit(x_train, y_train) y_test_predict = classification.predict(x_test) y_test_prob_predict = classification.predict_proba(x_test)[:, 1] result = evaluation(y_test, y_test_predict) results.append(result) true_labels.append(y_test) predict_labels.append(y_test_predict) predict_probability.append(y_test_prob_predict) plot_roc_curve(true_labels, predict_probability, args.result_dir) plot_pr_curve(true_labels, predict_probability, args.result_dir) final_result = np.array(results).mean(axis=0) result_print(final_result) elif args.method == 'AdaBoost' or args.method == 'NB' or args.method == 'LDA' or args.method == 'QDA': results = [] true_labels = [] predict_labels = [] predict_probability = [] for train, test in folds: x_train = output_array[train] x_test = output_array[test] y_train = label_list[train] y_test = label_list[test] if args.method == 'AdaBoost': classification = AdaBoostClassifier() elif args.method == 'NB': classification = GaussianNB() elif args.method == 'LDA': classification = lda() elif args.method == 'QDA': classification = qda() classification.fit(x_train, y_train) y_test_predict = classification.predict(x_test) y_test_prob_predict = classification.predict_proba(x_test)[:, 1] result = evaluation(y_test, y_test_predict) results.append(result) true_labels.append(y_test) predict_labels.append(y_test_predict) predict_probability.append(y_test_prob_predict) plot_roc_curve(true_labels, predict_probability, args.result_dir) plot_pr_curve(true_labels, predict_probability, args.result_dir) final_result = np.array(results).mean(axis=0) result_print(final_result) all_predict = classification.predict(output_array) with open(args.result_dir + 'prediction result', 'w') as f: space = ' ' f.write('No.' + space + 'True Label' + space + 'Predict Label\n') for i in range(len(all_predict)): f.write( str(i) + space + str(label_list[i]) + space + str(all_predict[i])) f.write('\n')
def main(): st.title("Binary Classification Web App") st.sidebar.title("Binary Classification Web App") st.markdown("Are your mushrooms edible or poisonous? 🍄") st.sidebar.markdown("Are your mushrooms edible or poisonous? 🍄") @st.cache(persist=True) def load_data(): data = pd.read_csv("mushrooms.csv") labelencoder = LabelEncoder() for col in data.columns: data[col] = labelencoder.fit_transform(data[col]) return data @st.cache(persist=True) def split(df): y = df.iloc[:, 0] x = df.iloc[:, 1:] x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=0) return x_train, x_test, y_train, y_test def plot_metrics(metrics_list): if 'Confusion Matrix' in metrics_list: st.subheader("Confusion Matrix") plot_confusion_matrix(model, X_test, y_test, display_labels=class_names) st.pyplot() if 'ROC Curve' in metrics_list: st.subheader("ROC Curve") plot_roc_curve(model, X_test, y_test) st.pyplot() if 'Precision-Recall Curve' in metrics_list: st.subheader('Precision-Recall Curve') plot_precision_recall_curve(model, X_test, y_test) st.pyplot() # st.sidebar.subheader("Choose Dataset") # file_name = st.sidebar.selectbox("file_name",("mushroom.csv", "dataset_1.csv", "dataset_2.csv", "dataset_3.csv") # df = load_data(file_name) class_names = ['value_0', 'value_1'] df = load_data() if st.sidebar.checkbox("Show raw data", False): st.subheader("Mushroom Data Set (Classification)") st.write(df) X_train, X_test, y_train, y_test = split(df) st.sidebar.subheader("Dimenstion Reduction Technique") method = st.sidebar.selectbox( "method", ("Principal Component Analysis (PCA)", "Linear discriminant Analysis (LDA)", "KernalPCA", "NO REDUCTION")) if (method == "Principal Component Analysis (PCA)"): no_of_components = st.sidebar.number_input("no. of input feature", 1, 5, step=1, key='n_components') red_tech = PCA(n_components=no_of_components) X_train = red_tech.fit_transform(X_train) X_test = red_tech.transform(X_test) if (method == "Linear discriminant Analysis (LDA)"): no_of_components = st.sidebar.number_input("no. of input feature", 1, 5, step=1, key='n_components') red_tech = lda(n_components=no_of_components) X_train = red_tech.fit_transform(X_train, y_train) X_test = red_tech.transform(X_test) if (method == "KernalPCA"): no_of_components = st.sidebar.number_input("no. of input feature", 1, 5, step=1, key='n_components') ker = st.sidebar.radio("kernel_selection", ("Linear", "RBF"), key='kern') red_tech = KernelPCA(n_components=no_of_components, kernel=ker) X_train = red_tech.fit_transform(X_train) X_test = red_tech.transform(X_test) st.sidebar.subheader("Choose Classifier") classifier = st.sidebar.selectbox( "Classifier", ("Support Vector Machine (SVM)", "Logistic Regression", "Random Forest", "Auto select acco. to Dataset")) if classifier == 'Support Vector Machine (SVM)': st.sidebar.subheader("Model Hyperparameters") #choose parameters parameter = st.sidebar.radio("parameter_selection", ("Mannual", "Auto")) if (parameter == "Mannual"): C = st.sidebar.number_input("C (Regularization parameter)", 0.01, 10.0, step=0.01, key='C_SVM') kernel = st.sidebar.radio("Kernel", ("rbf", "linear"), key='kernel') gamma = st.sidebar.radio("Gamma (Kernel Coefficient)", ("scale", "auto"), key='gamma') model = SVC(C=C, kernel=kernel, gamma=gamma) model.fit(X_train, y_train) if (parameter == "Auto"): model_a = SVC(kernel='rbf') model_a.fit(X_train, y_train) prameters = { 'kernel': ('linear', 'rbf'), 'C': [1, 10], 'gamma': [1, 10] } model = GridSearchCV(model_a, prameters, n_jobs=-1) model.fit(X_train, y_train) metrics = st.sidebar.multiselect( "What metrics to plot?", ('Confusion Matrix', 'ROC Curve', 'Precision-Recall Curve')) if st.sidebar.button("Classify", key='classify'): st.subheader("Support Vector Machine (SVM) Results") accuracy = model.score(X_test, y_test) y_pred = model.predict(X_test) st.write("Accuracy: ", accuracy.round(2)) st.write( "Precision: ", precision_score(y_test, y_pred, labels=class_names).round(2)) st.write("Recall: ", recall_score(y_test, y_pred, labels=class_names).round(2)) plot_metrics(metrics) if classifier == 'Logistic Regression': st.sidebar.subheader("Model Hyperparameters") parameter = st.sidebar.radio("parameter_selection", ("Mannual", "Auto")) if (parameter == "Mannual"): C = st.sidebar.number_input("C (Regularization parameter)", 0.01, 10.0, step=0.01, key='C_LR') max_iter = st.sidebar.slider("Maximum number of iterations", 100, 500, key='max_iter') model = LogisticRegression(C=C, max_iter=max_iter) model.fit(X_train, y_train) if (parameter == "Auto"): model_a = LogisticRegression() model_a.fit(X_train, y_train) prameters = [{'C': [1, 10], 'max_iter': [100, 500]}] model = GridSearchCV(model_a, prameters, n_jobs=-1) model.fit(X_train, y_train) metrics = st.sidebar.multiselect( "What metrics to plot?", ('Confusion Matrix', 'ROC Curve', 'Precision-Recall Curve')) if st.sidebar.button("Classify", key='classify'): st.subheader("Logistic Regression Results") accuracy = model.score(X_test, y_test) y_pred = model.predict(X_test) st.write("Accuracy: ", accuracy.round(2)) st.write( "Precision: ", precision_score(y_test, y_pred, labels=class_names).round(2)) st.write("Recall: ", recall_score(y_test, y_pred, labels=class_names).round(2)) plot_metrics(metrics) if classifier == 'Random Forest': st.sidebar.subheader("Model Hyperparameters") parameter = st.sidebar.radio("parameter_selection", ("Mannual", "Auto")) if (parameter == "Mannual"): n_estimators = st.sidebar.number_input( "The number of trees in the forest", 100, 5000, step=10, key='n_estimators') max_depth = st.sidebar.number_input( "The maximum depth of the tree", 1, 20, step=1, key='max_depth') bootstrap = st.sidebar.radio( "Bootstrap samples when building trees", ('True', 'False'), key='bootstrap') model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, bootstrap=bootstrap, n_jobs=-1) model.fit(X_train, y_train) if (parameter == "Auto"): model_a = RandomForestClassifier(n_estimators=100) model_a.fit(X_train, y_train) prameters = { 'n_estimators': [100, 300, 10], 'criterion': ['gini', 'entropy'], 'max_depth': [1, 20], 'bootstrap': ['True', 'False'] } model = GridSearchCV(model_a, prameters, n_jobs=-1) model.fit(X_train, y_train) metrics = st.sidebar.multiselect( "What metrics to plot?", ('Confusion Matrix', 'ROC Curve', 'Precision-Recall Curve')) if st.sidebar.button("Classify", key='classify'): st.subheader("Random Forest Results") accuracy = model.score(X_test, y_test) y_pred = model.predict(X_test) st.write("Accuracy: ", accuracy.round(2)) st.write( "Precision: ", precision_score(y_test, y_pred, labels=class_names).round(2)) st.write("Recall: ", recall_score(y_test, y_pred, labels=class_names).round(2)) plot_metrics(metrics)
from sklearn.naive_bayes import GaussianNB from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as lda from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as qda import numpy as np import matplotlib.pyplot as plt from sklearn.datasets import fetch_mldata if __name__ == '__main__': from data.data_reader import get_training_data from data.data_combinator import get_full_combinations x_train, y_train, x_val, y_val = get_training_data(validation=True) x_train = get_full_combinations(x_train) x_val = get_full_combinations(x_val) LDA = lda() LDA.fit(x_train, y_train) LDA_prob = LDA.predict_proba(x_val) LDA_prob QDA = qda() QDA.fit(x_train, y_train) QDA_prob = QDA.predict_proba(x_val) QDA_prob GNB = GaussianNB() GNB.fit(x_train, y_train) GaussianNB_prob = GNB.predict_proba(x_val) GaussianNB_prob # alpha = 1.0
# print(y) y = [np.where(all_id_in_y == a)[0].tolist()[0] for a in y] y = np.array(y) # print(y) # print(y2) # print(all_id_in_y) # clf = lda(n_components=2) # x_new = clf.fit_transform(X, y) # # pca = PCA(n_components=2) # # x_new = pca.fit_transform(X, y) # plt.scatter(x_new[:, 0], x_new[:, 1], c=y) # plt.show() # 3d clf = lda(n_components=3) x_new = clf.fit_transform(X, y) fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.scatter(x_new[:, 0], x_new[:, 1], x_new[:, 2], c=y, marker='o') ax.set_xlabel('X Label') ax.set_ylabel('Y Label') ax.set_zlabel('Z Label') plt.show()
reduced_base = base_pca.transform(base_long) reduced_stim = stim_pca.transform(stim_long) repeats = 500 base_acc = [] stim_acc = [] for i in range(repeats): # These subsets are not non-overlapping!! train_base = np.random.choice(np.arange(60),size=45,replace=False) test_base = np.random.choice(np.arange(60),size=15,replace=False) train_stim = np.random.choice(np.arange(60),size=45,replace=False) test_stim = np.random.choice(np.arange(60),size=15,replace=False) base_lda = lda() base_lda.fit(reduced_base[train_base,:], groups[train_base]) base_acc.append(sum(base_lda.predict(reduced_base[test_base,:]) == groups[test_base]) / len(groups[test_base])) #print('explained_var = %.3f, accuracy = %.3f' % (explained_var_base,accuracy)) stim_lda = lda() stim_lda.fit(reduced_stim[train_stim,:], groups[train_stim]) stim_acc.append(sum(stim_lda.predict(reduced_stim[test_stim,:]) == groups[test_stim]) / len(groups[test_stim])) #print('explained_var = %.3f, accuracy = %.3f' % (explained_var_stim,accuracy)) plt.figure() plt.title(os.path.basename(file_list[file])) plt.show(plt.hist(base_acc)) plt.show(plt.hist(stim_acc)) # ============================================================================= # =============================================================================