def __init__(self, **kwargs): super(LDA, self).__init__() super(LDA, self).SetModel(LinearDiscriminantAnalysis(**kwargs))
target = D[:, target_list] # print(target) # 目标分类值 Sample = D[:, :target_list] # as it creates all the possible training/test sets by removing p samples. from the complete set. SSlit = ShuffleSplit(n_splits=5, test_size=0.3) # clf = svm.SVC(C=1.0, kernel='poly',degree = 3, gamma = 'auto') # SVR 分类模型 # 分类模型 clf_svm1 = svm.SVC(kernel='rbf', gamma='scale') clf_svm2 = svm.SVC(kernel='linear', gamma='scale') clf_tree = tree.DecisionTreeClassifier(criterion="gini") clf_lda = LinearDiscriminantAnalysis(solver="svd", n_components=ldaNum, store_covariance=True, tol=1.0e-4) clf_knn = neighbors.KNeighborsClassifier(n_neighbors=5, weights='uniform', algorithm='auto', leaf_size=1, p=2, metric='minkowski', metric_params=None) clf_NN = MLPClassifier(hidden_layer_sizes=(10, ), activation='logistic', solver='lbfgs', alpha=0.0001, batch_size='auto', learning_rate='adaptive', max_iter=200,
else: stability_idx = np.load(stability_idx_path) data = data[:, stability_idx] #%% Decoding Main Part decoding_method = 'nn' #======================== For Sklearn Classifier ======================= if decoding_method == 'sklearn': info = pd.DataFrame(columns=['single', 'mean']) param_grid = gen_param_grid('lda') # make pipeline if voxel_selection_method == 'stability': pipe = Pipeline([('classifier', LinearDiscriminantAnalysis(solver='lsqr', shrinkage=0.9))]) elif voxel_selection_method == 'discrim': pipe = Pipeline([('feature_selection', SelectPercentile(percentile=25)), ('classifier', LinearDiscriminantAnalysis(solver='lsqr', shrinkage=0.9))]) # model = LogisticRegression(C=0.001, max_iter=8000, solver='liblinear') # selector = RFE(model, n_features_to_select=0.25) ### best params after grid searching ### # LogisticRegression(C=0.001, max_iter=8000, solver='liblinear') # MLPClassifier(hidden_layer_sizes=100, alpha=0.01) # SVC(max_iter=8000, C=0.001, kernel='linear', decision_function_shape='ovo') # RandomForestClassifier(n_estimators=500) # Lasso(alpha=0.01)
def sklearn_lda(x, y, nComponent=None): lda = LinearDiscriminantAnalysis(n_components=nComponent) lda.fit(X, y) newx = lda.transform(X) data_plot2d(newx, y)
#%% # CART(classification and regression trees) Classification kfold = KFold(n_splits=10, random_state=7) model = DecisionTreeClassifier() results = cross_val_score(model, X, Y, cv=kfold) print(results.mean()) #%% #Gaussian Naive Bayes model = GaussianNB() results = cross_val_score(model, X, Y, cv=kfold) print(results.mean()) #%% #SVM model = SVC() results = cross_val_score(model, X, Y, cv=kfold) print(results.mean()) #%% #LDA model = LinearDiscriminantAnalysis() results = cross_val_score(model, X, Y, cv=kfold) print(results.mean()) #%% #K-Nearest Neighbor model = KNeighborsClassifier() results = cross_val_score(model, X, Y, cv=kfold) print(results.mean())
sss = StratifiedShuffleSplit(labels, 10, test_size=0.2, random_state=23) for train_index, test_index in sss: X_train, X_test = train.values[train_index], train.values[test_index] y_train, y_test = labels[train_index], labels[test_index] classifiers = [ KNeighborsClassifier(3), SVC(kernel='rbf', C=0.025, probability=True), NuSVC(probability=True), DecisionTreeClassifier(), AdaBoostClassifier(), GradientBoostingClassifier(), GaussianNB(), LinearDiscriminantAnalysis(), QuadraticDiscriminantAnalysis()] log_cols = ["Classifier", "Accuracy", "Log Loss"] log = pd.DataFrame(columns=log_cols) for clf in classifiers: clf.fit(X_train, y_train) name = clf.__class__.__name__ print("=" * 30) print(name) print('****Result****') train_predictions = clf.predict(X_test) acc = accuracy_score(y_test, train_predictions)
# method 1: Feature selection * Back Propagation * Forward Propagation * Bidirectional Propagation # method 2 : Feature Extraction ################################# PCA Reduction ################ from sklearn.decomposition import PCA # linear dimentionality reduction pca = PCA(n_components = None) X_train = pca.fit_transform(X_train) X_test = pca.transform(X_test) explained_variance = pca.explained_variance_ratio_ ################################# LDA Reduction ################ from sklearn.discriminant_analysis import LinearDiscriminantAnalysis lda = LinearDiscriminantAnalysis(n_components = 2) X_train = lda.fit_transform(X_train, y_train) X_test = lda.transform() ################################# applying kernel_pca ################ from sklearn.decomposition import KernelPCA kpca = KernelPCA(n_components = 2 , kernel = 'rbf') X_train = kpca.fit_transform(X_train) X_test = kpca.transform(X_test)
def run_16(X_train, X_test, y_train, y_test, dataset): LOGGER.info('running 16...') settings = { 'wage': { 'pca': 65, 'ica': 92, 'rp': 105, 'lda': 1, 'kmeans': 2, 'gmm': 2, 'kmeans_ica': 83, 'kmeans_lda': 99, 'gmm_lda': 99, 'gmm_ica': 83, }, 'wine': { 'pca': 12, 'ica': 12, 'rp': 13, 'lda': 2, 'kmeans': 3, 'gmm': 3, 'kmeans_lda': 99, 'gmm_lda': 99, }, } score_fns = [ v_measure_score, homogeneity_score, completeness_score, ] pca = PCA(n_components=settings[dataset]['pca']) pca.fit(X_train) ica = FastICA(n_components=settings[dataset]['ica']) ica.fit(X_train) rp = SparseRandomProjection(n_components=settings[dataset]['rp']) rp.fit(X_train) lda = LinearDiscriminantAnalysis(n_components=settings[dataset]['lda']) lda.fit(X_train, y_train) plt.clf() visualizer = KElbowVisualizer(KMeans(), k=(2, 100), metric='calinski_harabasz', timings=True) visualizer.fit(pca.transform(X_train)) # visualizer.show() plt.tight_layout() plt.savefig('plots/p16/km_pca_' + dataset + '.png') # visualizer.poof() kmeans = KMeans(n_clusters=settings[dataset]['kmeans'], random_state=0) kmeans.fit(pca.transform(X_train)) cluster_validation_df = pd.DataFrame() for score in score_fns: cluster_validation_df.loc[score.__name__, 'score'] = score( y_test[y_test.columns[0]], kmeans.predict(pca.transform(X_test))) # print(cluster_validation_df) LOGGER.info('KMeans PCA {}: \n{}'.format(dataset, cluster_validation_df)) plt.clf() visualizer = KElbowVisualizer(KMeans(), k=(2, 100), metric='calinski_harabasz', timings=True) visualizer.fit(ica.transform(X_train)) # visualizer.show() plt.tight_layout() plt.savefig('plots/p16/km_ica_' + dataset + '.png') # visualizer.poof() kmeans = KMeans(n_clusters=settings[dataset]['kmeans'], random_state=0) kmeans.fit(ica.transform(X_train)) cluster_validation_df = pd.DataFrame() for score in score_fns: cluster_validation_df.loc[score.__name__, 'score'] = score( y_test[y_test.columns[0]], kmeans.predict(ica.transform(X_test))) # print(cluster_validation_df) LOGGER.info('KMeans ICA {}: \n{}'.format(dataset, cluster_validation_df)) plt.clf() visualizer = KElbowVisualizer(KMeans(), k=(2, 100), metric='calinski_harabasz', timings=True) visualizer.fit(rp.transform(X_train)) # visualizer.show() plt.tight_layout() plt.savefig('plots/p16/km_rp_' + dataset + '.png') # visualizer.poof() kmeans = KMeans(n_clusters=settings[dataset]['kmeans'], random_state=0) kmeans.fit(rp.transform(X_train)) cluster_validation_df = pd.DataFrame() for score in score_fns: cluster_validation_df.loc[score.__name__, 'score'] = score( y_test[y_test.columns[0]], kmeans.predict(rp.transform(X_test))) # print(cluster_validation_df) LOGGER.info('KMeans RP {}: \n{}'.format(dataset, cluster_validation_df)) plt.clf() visualizer = KElbowVisualizer(KMeans(), k=(2, 100), metric='calinski_harabasz', timings=True) visualizer.fit(lda.transform(X_train)) # visualizer.show() plt.tight_layout() plt.savefig('plots/p16/km_lda_' + dataset + '.png') # visualizer.poof() kmeans = KMeans(n_clusters=settings[dataset]['kmeans'], random_state=0) kmeans.fit(lda.transform(X_train)) cluster_validation_df = pd.DataFrame() for score in score_fns: cluster_validation_df.loc[score.__name__, 'score'] = score( y_test[y_test.columns[0]], kmeans.predict(lda.transform(X_test))) # print(cluster_validation_df) LOGGER.info('KMeans LDA {}: \n{}'.format(dataset, cluster_validation_df)) gmm = GaussianMixture(random_state=0) score_df = pd.DataFrame() k_max = 100 for k in range(2, k_max): gmm.set_params(n_components=k) predY = gmm.fit_predict(pca.transform(X_train)) score_df.loc[k, 'score'] = calinski_harabasz_score( pca.transform(X_train), predY) LOGGER.info('gmm pca max score on {}: k={}'.format( dataset, score_df.idxmax(axis=0)['score'])) plt.clf() plt.title("calinski_harabasz_Expectation_Maximization") plt.xlabel('k') plt.ylabel('score') plt.plot(score_df.reset_index()['index'], score_df['score'], label='calinski_harabasz_score') plt.legend(loc="best") plt.savefig('plots/p16/' + '_'.join(['gm', 'pca', dataset, '.png'])) gmm = GaussianMixture(n_components=settings[dataset]['gmm'], random_state=0) gmm.fit(pca.transform(X_train)) cluster_validation_df = pd.DataFrame() for score in score_fns: cluster_validation_df.loc[score.__name__, 'score'] = score( y_test[y_test.columns[0]], gmm.predict(pca.transform(X_test))) LOGGER.info('GMM PCA {}: \n{}'.format(dataset, cluster_validation_df)) gmm = GaussianMixture(random_state=0) score_df = pd.DataFrame() k_max = 100 for k in range(2, k_max): gmm.set_params(n_components=k) predY = gmm.fit_predict(ica.transform(X_train)) score_df.loc[k, 'score'] = calinski_harabasz_score( ica.transform(X_train), predY) LOGGER.info('gmm ica max score on {}: k={}'.format( dataset, score_df.idxmax(axis=0)['score'])) plt.clf() plt.title("calinski_harabasz_Expectation_Maximization") plt.xlabel('k') plt.ylabel('score') plt.plot(score_df.reset_index()['index'], score_df['score'], label='calinski_harabasz_score') plt.legend(loc="best") plt.savefig('plots/p16/' + '_'.join(['gm', 'ica', dataset, '.png'])) gmm = GaussianMixture(n_components=settings[dataset]['gmm'], random_state=0) gmm.fit(ica.transform(X_train)) cluster_validation_df = pd.DataFrame() for score in score_fns: cluster_validation_df.loc[score.__name__, 'score'] = score( y_test[y_test.columns[0]], gmm.predict(ica.transform(X_test))) LOGGER.info('GMM ICA {}: \n{}'.format(dataset, cluster_validation_df)) gmm = GaussianMixture(random_state=0) score_df = pd.DataFrame() k_max = 100 for k in range(2, k_max): gmm.set_params(n_components=k) predY = gmm.fit_predict(rp.transform(X_train)) score_df.loc[k, 'score'] = calinski_harabasz_score( rp.transform(X_train), predY) LOGGER.info('gmm rp max score on {}: k={}'.format( dataset, score_df.idxmax(axis=0)['score'])) plt.clf() plt.title("calinski_harabasz_Expectation_Maximization") plt.xlabel('k') plt.ylabel('score') plt.plot(score_df.reset_index()['index'], score_df['score'], label='calinski_harabasz_score') plt.legend(loc="best") plt.savefig('plots/p16/' + '_'.join(['gm', 'rp', dataset, '.png'])) gmm = GaussianMixture(n_components=settings[dataset]['gmm'], random_state=0) gmm.fit(rp.transform(X_train)) cluster_validation_df = pd.DataFrame() for score in score_fns: cluster_validation_df.loc[score.__name__, 'score'] = score( y_test[y_test.columns[0]], gmm.predict(rp.transform(X_test))) LOGGER.info('GMM RP {}: \n{}'.format(dataset, cluster_validation_df)) gmm = GaussianMixture(random_state=0) score_df = pd.DataFrame() k_max = 100 for k in range(2, k_max): gmm.set_params(n_components=k) predY = gmm.fit_predict(lda.transform(X_train)) score_df.loc[k, 'score'] = calinski_harabasz_score( lda.transform(X_train), predY) LOGGER.info('gmm lda max score on {}: k={}'.format( dataset, score_df.idxmax(axis=0)['score'])) plt.clf() plt.title("calinski_harabasz_Expectation_Maximization") plt.xlabel('k') plt.ylabel('score') plt.plot(score_df.reset_index()['index'], score_df['score'], label='calinski_harabasz_score') plt.legend(loc="best") plt.savefig('plots/p16/' + '_'.join(['gm', 'lda', dataset, '.png'])) gmm = GaussianMixture(n_components=settings[dataset]['gmm'], random_state=0) gmm.fit(lda.transform(X_train)) cluster_validation_df = pd.DataFrame() for score in score_fns: cluster_validation_df.loc[score.__name__, 'score'] = score( y_test[y_test.columns[0]], gmm.predict(lda.transform(X_test))) LOGGER.info('GMM LDA {}: \n{}'.format(dataset, cluster_validation_df))
import numpy as np import pandas as pd import csv from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn import cross_validation from sklearn.metrics import confusion_matrix tt = pd.read_csv("dm-hw-m-train.txt", header=None) index = tt.values[:, 0] X = tt.values[:, 1:4] y = tt.values[:, 4] # Performing cross-validation clf = LinearDiscriminantAnalysis() X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel() score = (tp + tn) / (tn + fp + fn + tp) # accuracy score print('LDA accuracy score: %f' % score) # Applying on real test data clf = LinearDiscriminantAnalysis() clf.fit(X, y)
def twodim(d): lda = LinearDiscriminantAnalysis(n_components=2) d = sc.fit_transform(d) lda_object = lda.fit(d, y) d = lda_object.transform(d) return d
def run_nn_2(X_train, X_test, y_train, y_test, dataset): LOGGER.info('running NN...') settings = { 'wage': { 'pca': 65, 'ica': 92, 'rp': 105, 'lda': 1, 'kmeans': 2, 'gmm': 2, 'kmeans_ica': 83, 'kmeans_lda': 99, 'gmm_lda': 99, 'gmm_ica': 83, 'nn': { 'iter': 200, 'hls': 1000, 'alpha': .0001, }, }, 'wine': { 'pca': 12, 'ica': 12, 'rp': 13, 'lda': 2, 'kmeans': 3, 'gmm': 3, 'kmeans_lda': 99, 'gmm_lda': 99, 'nn': { 'iter': 200, 'hls': 800, 'alpha': .1, }, }, } LOGGER.info('NN OG...') nn = MLPClassifier(max_iter=settings[dataset]['nn']['iter'], hidden_layer_sizes=settings[dataset]['nn']['hls'], alpha=settings[dataset]['nn']['alpha']) nn_check(X_train, X_test, y_train, y_test, nn, 'OG') nn_epochs(X_train.to_numpy(), X_test.to_numpy(), y_train, y_test, nn, 'OG') LOGGER.info('NN PCA...') pca = PCA(n_components=settings[dataset]['pca'], random_state=0) X_train_transformed = pca.fit_transform(X_train) X_test_transformed = pca.transform(X_test) nn = MLPClassifier(max_iter=settings[dataset]['nn']['iter'], hidden_layer_sizes=settings[dataset]['nn']['hls'], alpha=settings[dataset]['nn']['alpha']) nn_check(X_train_transformed, X_test_transformed, y_train, y_test, nn, 'PCA') nn_epochs(X_train_transformed, X_test_transformed, y_train, y_test, nn, 'PCA') LOGGER.info('NN ICA...') ica = FastICA(n_components=settings[dataset]['ica'], random_state=0) X_train_transformed = ica.fit_transform(X_train) X_test_transformed = ica.transform(X_test) nn = MLPClassifier(max_iter=settings[dataset]['nn']['iter'], hidden_layer_sizes=settings[dataset]['nn']['hls'], alpha=settings[dataset]['nn']['alpha']) nn_check(X_train_transformed, X_test_transformed, y_train, y_test, nn, 'ICA') nn_epochs(X_train_transformed, X_test_transformed, y_train, y_test, nn, 'ICA') LOGGER.info('NN RP...') rp = SparseRandomProjection(n_components=settings[dataset]['rp'], random_state=0) X_train_transformed = rp.fit_transform(X_train) X_test_transformed = rp.transform(X_test) nn = MLPClassifier(max_iter=settings[dataset]['nn']['iter'], hidden_layer_sizes=settings[dataset]['nn']['hls'], alpha=settings[dataset]['nn']['alpha']) nn_check(X_train_transformed, X_test_transformed, y_train, y_test, nn, 'RP') nn_epochs(X_train_transformed, X_test_transformed, y_train, y_test, nn, 'RP') LOGGER.info('NN LDA...') lda = LinearDiscriminantAnalysis(n_components=settings[dataset]['lda']) X_train_transformed = lda.fit_transform(X_train, y_train) X_test_transformed = lda.transform(X_test) nn = MLPClassifier(max_iter=settings[dataset]['nn']['iter'], hidden_layer_sizes=settings[dataset]['nn']['hls'], alpha=settings[dataset]['nn']['alpha']) nn_check(X_train_transformed, X_test_transformed, y_train, y_test, nn, 'LDA') nn_epochs(X_train_transformed, X_test_transformed, y_train, y_test, nn, 'LDA') kmeans = KMeans(n_clusters=settings[dataset]['kmeans'], random_state=0) X_train_transformed = kmeans.fit_transform(X_train) X_test_transformed = kmeans.transform(X_test) nn = MLPClassifier(max_iter=settings[dataset]['nn']['iter'], hidden_layer_sizes=settings[dataset]['nn']['hls'], alpha=settings[dataset]['nn']['alpha']) nn_check(X_train_transformed, X_test_transformed, y_train, y_test, nn, 'KMEANS') nn_epochs(X_train_transformed, X_test_transformed, y_train, y_test, nn, 'KMEANS') gmm = GaussianMixture(n_components=settings[dataset]['gmm'], random_state=0) gmm.fit(X_train) X_train_transformed = gmm.predict_proba(X_train) X_test_transformed = gmm.predict_proba(X_test) # X_train_transformed = gmm.predict(X_train) # X_test_transformed = gmm.predict(X_test) # print(X_train_transformed) # print(X_test_transformed) nn = MLPClassifier(max_iter=settings[dataset]['nn']['iter'], hidden_layer_sizes=settings[dataset]['nn']['hls'], alpha=settings[dataset]['nn']['alpha']) nn_check(X_train_transformed, X_test_transformed, y_train, y_test, nn, 'GMM') nn_epochs(X_train_transformed, X_test_transformed, y_train, y_test, nn, 'GMM')
import numpy as np from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.naive_bayes import GaussianNB from sklearn import tree X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) y = np.array([1, 1, 1, 2, 2, 2]) clf = LinearDiscriminantAnalysis(solver='svd') clf.fit(X, y) print(clf.predict([[1, 3]])) print(X.shape) k = [[ [1, 1], ], [[2, 2], 2], [[3, 3]], 3] a = np.array([[k[0], 1], [k[1], 2], [k[2], 3]]) b = np.array([1, 2, 3]) ab = GaussianNB().fit(k, b) ab.predict([[[4, 4], 4]]) cd = tree.DecisionTreeClassifier().fit(X, y) print(cd.predict([[0, 0]]))
splot.set_xticks(()) splot.set_yticks(()) def plot_lda_cov(lda, splot): plot_ellipse(splot, lda.means_[0], lda.covariance_, 'red') plot_ellipse(splot, lda.means_[1], lda.covariance_, 'blue') def plot_qda_cov(qda, splot): plot_ellipse(splot, qda.means_[0], qda.covariances_[0], 'red') plot_ellipse(splot, qda.means_[1], qda.covariances_[1], 'blue') for i, (X, y) in enumerate([data_aud_dmn(), data_aud_sal(), data_dmn_sal()]): # Linear Discriminant Analysis lda = LinearDiscriminantAnalysis(solver="svd", store_covariance=True) y_pred = lda.fit(X, y).predict(X) splot = plot_data(lda, X, y, y_pred, fig_index=2 * i + 1) plot_lda_cov(lda, splot) plt.axis('tight') # Quadratic Discriminant Analysis qda = QuadraticDiscriminantAnalysis(store_covariances=True) y_pred = qda.fit(X, y).predict(X) splot = plot_data(qda, X, y, y_pred, fig_index=2 * i + 2) plot_qda_cov(qda, splot) plt.axis('tight') plt.suptitle('Linear Discriminant Analysis vs Quadratic Discriminant' 'Analysis') plt.show()
X, y = make_blobs(n_samples=n_samples, n_features=1, centers=[[-2], [2]]) # add non-discriminative features if n_features > 1: X = np.hstack([X, np.random.randn(n_samples, n_features - 1)]) return X, y acc_clf1, acc_clf2 = [], [] n_features_range = range(1, n_features_max + 1, step) for n_features in n_features_range: score_clf1, score_clf2 = 0, 0 for _ in range(n_averages): X, y = generate_data(n_train, n_features) clf1 = LinearDiscriminantAnalysis(solver='lsqr', shrinkage='auto').fit(X, y) clf2 = LinearDiscriminantAnalysis(solver='lsqr', shrinkage=None).fit(X, y) X, y = generate_data(n_test, n_features) score_clf1 += clf1.score(X, y) score_clf2 += clf2.score(X, y) acc_clf1.append(score_clf1 / n_averages) acc_clf2.append(score_clf2 / n_averages) features_samples_ratio = np.array(n_features_range) / n_train plt.plot(features_samples_ratio, acc_clf1, linewidth=2,
print( df_.groupby(['Predicted default status', 'True default status']).size().unstack('True default status')) print(classification_report(y, y_pred)) # ### Sklearn # In[22]: from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.metrics import confusion_matrix, classification_report, precision_score #X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25) print(len(X_test)) # Fit and predict using LDA lda = LinearDiscriminantAnalysis(solver='svd') lda.fit(X_train, y_train) y_pred = lda.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) print(classification_report(y_test, y_pred, target_names=['No', 'Yes'])) # the LDA and logistic regression predictions are almost identical with 83% accuracy score. The LDA output indicates that πˆ1 = 0.84 and πˆ2 = 0.44; in other words, 84% of the training observations correspond to credit scores that are not defaulting. It also provides the group means; these are the average of each predictor within each class, and are used by LDA as estimates of μk. # In[29]: X = df[['balance', 'income']].values y = df.default2.values
#Confusion matrix plot_confusion_matrix(y_test, pred, figsize=(7, 5), cmap="PuBuGn") bottom, top = plt.ylim() plt.ylim(bottom + 0.5, top - 0.5) st.pyplot() except: st.write("Preencha todos os parâmetros") ######################################## # LINEAR DISCRIMINANT CLASSIFIER ######################################## if ML_option == "Linear Discriminant Analysis": # Fit the model and predict X_test. Show some analysis. try: lda = LinearDiscriminantAnalysis() lda.fit(X_train, y_train) pred = lda.predict(X_test) st.write("R2 Score: ", r2_score(y_test, pred)) st.write('Mean Absolute Error (MAE):', metrics.mean_absolute_error(y_test, pred)) st.write('Mean Squared Error (MSE):', metrics.mean_squared_error(y_test, pred)) st.write('Root Mean Squared Error (RMSE):', np.sqrt(metrics.mean_squared_error(y_test, pred))) st.write('Accuracy of Decision Tree Classifier on training set: ', lda.score(X_train, y_train)) st.write('Accuracy of Decision Tree Classifier on test set: ', lda.score(X_test, y_test)) st.subheader("Classificarion Report")
# plt.show() # scatter_matrix(dataset) # plt.show() array = dataset.values X = array[:, 0:4] Y = array[:, 4] validation_size = 0.20 seed = 7 scoring = 'accuracy' X_train, X_validation, Y_train, Y_validation = \ model_selection.train_test_split(X, Y, test_size=validation_size, random_state=seed) # Spot Checking models = [('LR', LogisticRegression()), ('LDA', LinearDiscriminantAnalysis()), ('KNN', KNeighborsClassifier()), ('CART', DecisionTreeClassifier()), ('NB', GaussianNB()), ('SVM', SVC())] results = [] names = [] # Shows KNN as the most accurate model for name, model in models: kfold = model_selection.KFold(n_splits=10, random_state=seed) cv_results = model_selection.cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring) results.append(cv_results)
kfold= StratifiedKFold(n_splits=10) # Classifiers (Building Classifier Array) # In[56]: random_state=2 classifiers=[] classifiers.append(SVC(random_state=random_state)) classifiers.append(DecisionTreeClassifier(random_state=random_state)) classifiers.append(RandomForestClassifier(random_state=random_state)) classifiers.append(KNeighborsClassifier()) classifiers.append(LogisticRegression(random_state=random_state)) classifiers.append(LinearDiscriminantAnalysis()) classifiers.append(AdaBoostClassifier(DecisionTreeClassifier(random_state=random_state),random_state=random_state)) classifiers.append(ExtraTreesClassifier(random_state=random_state)) classifiers.append(GradientBoostingClassifier(random_state=random_state)) classifiers.append(XGBClassifier(random_state=random_state)) cv_results=[] for classifier in classifiers: cv_results.append(cross_val_score(estimator=classifier,X=X_train,y=Y_train,cv=kfold,scoring='accuracy',n_jobs=-1)) cv_means=[] cv_std=[] for cv_result in cv_results: cv_means.append(cv_result.mean()) cv_std.append(cv_result.std()) # Cross Validation Results cv_res=pd.DataFrame({'CrossValMeans':cv_means , 'CrossValErrors':cv_std , 'Algorithm':['SVC','DTC','RFC','KNN','LR','LDA','ADA','XT','GBC','XGB']})
imus[2].resampled_euler_y[i:-number_of_points + i]) ], 0) out = np.append(out_z_0, out_z_2, 0) out = np.append(out, out_x_0, 0) out = np.append(out, out_x_2, 0) out = np.append(out, out_y_0, 0) out = np.append(out, out_y_2, 0) out = np.append(out, [dz0[number_of_points:]], 0) out = np.append(out, [dz2[number_of_points:]], 0) out = np.append(out, [dx0[number_of_points:]], 0) out = np.append(out, [dx2[number_of_points:]], 0) out = np.append(out, [dy0[number_of_points:]], 0) out = np.append(out, [dy2[number_of_points:]], 0) out = list(out.T) classifier = LinearDiscriminantAnalysis() classifier.fit(X, y) predicted_values = classifier.predict(out) predicted_values = medfilt(predicted_values, filter_size) print('Evaluating...') evaluated_buttons_timestamp = [] evaluated_buttons_values = [] evaluated_predicted_time = [] evaluated_predicted_values = [] for i in range(len(buttons_timestamp)): if testing_lower_time < buttons_timestamp[i] < testing_upper_time: evaluated_buttons_timestamp.append(buttons_timestamp[i]) evaluated_buttons_values.append(buttons_values[i]) for i in range(len(t)): if testing_lower_time < t[i] < testing_upper_time:
def classification(sub): temporal_size = 9 import matplotlib.pyplot as plt plt.rcParams["font.family"] = "Times New Roman" import seaborn as sns sns.set() res_val = np.zeros((9, temporal_size)) for i in range(1, 6): train_data = scipy.io.loadmat('competition/rev_3.5_0.5/' + sub + '_' + str(i) + '_train.mat') test_data = scipy.io.loadmat('competition/rev_3.5_0.5/' + sub + '_' + str(i) + '_test.mat') train_x = np.transpose(train_data['train'][0][0][0]) train_y = np.transpose(train_data['train'][0][0][1]) test_x = np.transpose(test_data['test'][0][0][0]) test_y = np.transpose(test_data['test'][0][0][1]) t_train_x = [] t_test_x = [] for k in range(0, 9): for j in range(0, temporal_size): t_train_x.append(arr_flatten(train_x[:, j, :, k])) t_test_x.append(arr_flatten(test_x[:, j, :, k])) import feature_selection as FS opt_idx = FS.lsvm_wrapper(np.array(t_train_x), train_y) cur_train_x = t_train_x[opt_idx] cur_test_x = t_test_x[opt_idx] lda = LinearDiscriminantAnalysis(solver='lsqr', shrinkage='auto') lda.fit(cur_train_x, train_y.argmax(axis=1)) y_predict = lda.predict(cur_test_x) coh = cohen_kappa_score(test_y.argmax(axis=1), y_predict) acc = accuracy_score(test_y.argmax(axis=1), y_predict) pre = precision_score(test_y.argmax(axis=1), y_predict, average='macro') rec = recall_score(test_y.argmax(axis=1), y_predict, average='macro') f1 = f1_score(test_y.argmax(axis=1), y_predict, average='macro') sen = str(coh) + ',' + str(acc) + ',' + str(pre) + ',' + str( rec) + ',' + str(f1) pen = open('LSVM_3.5_0.5.csv', 'a') pen.write('SVM,' + sub + ',' + str(i) + ',' + str(j) + ',' + sen + '\n') pen.close() """ for j in range(len(t_test_x)): cur_train_x = t_train_x[j] cur_test_x = t_test_x[j] lda = LinearDiscriminantAnalysis(solver='lsqr', shrinkage='auto') lda.fit(cur_train_x, train_y.argmax(axis=1)) y_predict = lda.predict(cur_test_x) coh = cohen_kappa_score(test_y.argmax(axis=1), y_predict) acc = accuracy_score(test_y.argmax(axis=1), y_predict) pre = precision_score(test_y.argmax(axis=1), y_predict, average='macro') rec = recall_score(test_y.argmax(axis=1), y_predict, average='macro') f1 = f1_score(test_y.argmax(axis=1), y_predict, average='macro') sen = str(coh) + ',' + str(acc) + ',' + str(pre) + ',' + str(rec) + ',' + str(f1) #pen = open('total_2_0.5.csv', 'a') #pen.write('SVM,' + sub + ',' + str(i) + ',' + str(j) + ',' + sen + '\n') #pen.close() y_val = j % temporal_size x_val = int(j / temporal_size) res_val[x_val, y_val] += coh res_val /= 5 plt.rcParams["font.family"] = "Times New Roman" ax = sns.heatmap(res_val, cmap="BuGn", vmin=0.1, vmax=0.85, square=True, annot=True) plt.savefig('fig/4.5_0.5/' + sub + '.png', format='png', dpi=1000) plt.close() """ print('abc')
# Apply random forest cverror = [] for e in (10, 40, 80, 100): clf = RandomForestClassifier(n_estimators=e) scores = cross_validation.cross_val_score(clf, Xtr_p, ytr, cv=5, scoring='accuracy') cverror.append(np.mean(1 - scores)) print("Random Forest tree:") print((10, 40, 80, 100)[cverror.index(min(cverror, key=float))]) print(min(cverror, key=float)) #Apply GradientBoosting clf = LinearDiscriminantAnalysis() scores = cross_validation.cross_val_score(clf, Xtr_p, ytr, cv=5) error = np.mean(1 - scores) print("LDA:") print(error) #Choose three best methods and then run onto the test dataset model1 = svm.SVC(C=0.01, kernel='linear', probability=True) model2 = LogisticRegression(C=0.1) model3 = LinearDiscriminantAnalysis() model1.fit(Xtr_p, ytr) model2.fit(Xtr_p, ytr) model3.fit(Xtr_p, ytr) print("Three best model to fit the test:") print("model1:")
def test_api_(): import os os.chdir('E:/Richard/Competition/4c/') for i in range(1, 10): csp = scipy.io.loadmat('csp/A0' + str(i) + '.mat')['csp'][0][0] tdp = scipy.io.loadmat('tdp/A0' + str(i) + '.mat')['tdp'][0][0] psd = scipy.io.loadmat('psd/A0' + str(i) + '.mat')['psd'][0][0] for j in range(4): ctx = np.transpose(csp[0][j]) cty = np.transpose(csp[1][j]).argmax(axis=1) cvx = np.transpose(csp[2][j]) cvy = np.transpose(csp[3][j]).argmax(axis=1) ttx = np.transpose(tdp[0][j]) tty = np.transpose(tdp[1][j]).argmax(axis=1) tvx = np.transpose(tdp[2][j]) tvy = np.transpose(tdp[3][j]).argmax(axis=1) ptx = np.transpose(psd[0][j]) pty = np.transpose(psd[1][j]).argmax(axis=1) pvx = np.transpose(psd[2][j]) pvy = np.transpose(psd[3][j]).argmax(axis=1) from sklearn import svm, linear_model from sklearn import ensemble mode = ['lsvm', 'ksvm', 'gb', 'srlda'] data = ['csp', 'tdp', 'psd'] for cls in mode: for d in data: if cls == 'lsvm': lda = svm.LinearSVC() elif cls == 'ksvm': lda = svm.SVC(kernel='linear') elif cls == 'gb': lda = ensemble.GradientBoostingClassifier() elif cls == 'srlda': lda = LinearDiscriminantAnalysis(solver='lsqr', shrinkage='auto') if d == 'csp': tx = ctx ty = cty vx = cvx vy = cvy elif d == 'tdp': tx = ttx ty = tty vx = tvx vy = tvy elif d == 'psd': tx = ptx ty = pty vx = pvx vy = pvy lda.fit(tx, ty) y_predict = lda.predict(vx) coh = cohen_kappa_score(vy, y_predict) acc = accuracy_score(vy, y_predict) pen = open('res/res_' + cls + '_' + d + '_f.csv', 'a') pen.write( str(i) + ',' + str(j) + ',' + str(coh) + ',' + str(acc) + '\n') pen.close()
X_train, X_test, y_train, y_test = train_test_split(public_data, public_labels, test_size=0.3, random_state=i*500) clf = TransformedTargetRegressor(regressor=SVR(kernel='poly'), transformer=MinMaxScaler()) #LinearRegression steps = [('scaler', StandardScaler()), ('red_dim', PCA()), ('clf', clf)] pipeline = Pipeline(steps) n_features_to_test = np.arange(1, 11) parameteres = [{'scaler':[MinMaxScaler()], 'red_dim':[PCA()], 'red_dim__n_components':list(n_features_to_test), 'clf__regressor__C': list(C_range), 'clf__regressor__gamma':['auto', 'scale']+list(gamma_range), 'clf__regressor__degree':[2, 3]}, {'scaler':[MinMaxScaler()], 'red_dim':[LinearDiscriminantAnalysis()], 'red_dim__n_components':[2], 'clf__regressor__C': list(C_range), 'clf__regressor__gamma':['auto', 'scale']+list(gamma_range), 'clf__regressor__degree':[2, 3]}, {'scaler':[MinMaxScaler()], 'red_dim':[None], 'clf__regressor__C': list(C_range), 'clf__regressor__gamma':['auto', 'scale']+list(gamma_range), 'clf__regressor__degree':[2, 3]}] grid = GridSearchCV(pipeline, param_grid=parameteres, cv=5, n_jobs=-1, verbose=1, scoring='neg_mean_absolute_error') grid.fit(X_train, y_train) score_train = grid.score(X_train, y_train) score_test = grid.score(X_test, y_test) best_p = grid.best_params_ bp = pd.DataFrame(best_p, index=[i]) bp['MAE_train'] = -score_train bp['MAE_test'] = -score_test bp['random_state'] = i*500
import csv import sys import nltk # import nltk.tokenize.casual from nltk.tokenize import word_tokenize from nltk.tokenize import TweetTokenizer import re from collections import Counter from sklearn.linear_model import LogisticRegression from sklearn.neighbors import KNeighborsClassifier from nltk.classify.util import apply_features, accuracy from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from nltk.classify.scikitlearn import SklearnClassifier classif = SklearnClassifier(LinearDiscriminantAnalysis()) tokenizer = TweetTokenizer(strip_handles=False, reduce_len=True) train_file = sys.argv[1] test_file = sys.argv[2] emoticon_string = r""" (?: [<>]? [:;=8] # eyes [\-o\*\']? # optional nose [\)\]\(\[dDpP/\:\}\{@\|\\] # mouth | [\)\]\(\[dDpP/\:\}\{@\|\\] # mouth [\-o\*\']? # optional nose [:;=8] # eyes
array = dataset.values X = array[:, 0:4] Y = array[:, 4] validation_size = 0.20 seed = 7 X_train, X_validation, Y_train, Y_validation = model_selection.train_test_split( X, Y, test_size=validation_size, random_state=seed) # Test options and evaluation metric seed = 7 scoring = 'accuracy' #spot check algorithms models = [] models.append(('LR', LogisticRegression())) models.append(('LDA', LinearDiscriminantAnalysis())) models.append(('KNN', KNeighborsClassifier())) models.append(('CART', DecisionTreeClassifier())) models.append(('NB', GaussianNB())) models.append(('SVM', SVC())) # evaluate each model in turn results = [] names = [] for name, model in models: kfold = model_selection.KFold(n_splits=10, random_state=seed) cv_results = model_selection.cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring) results.append(cv_results)
def ml(hursts,bt,pfds,hfd,targets,nof): data = np.zeros((nof,16)) for i in range (0,int(nof)): for y in range (0,4): data[i,y] = hursts[y,i] data[i,y+4] = bt[y,i] data[i,y+8]=pfds[y,i] data[i,y+12]=hfd[y,i] #print(data) clf = svm.SVC(kernel='linear', C=100,class_weight={2:3}) #support v clf_lda = LinearDiscriminantAnalysis() #clf = joblib.load('classifier.pkl') targets2=np.zeros((len(targets))) data2=np.zeros((len(data))) for i in range(0,len(data)): targets2[i] = int(targets[i]) # print(targets2.ravel()) # y = label_binarize(targets2.ravel(), classes=[1, 2]) # print(y) # n_classes = y.shape[1] # X_train, X_test, y_train, y_test = train_test_split(data,y.ravel(), test_size=.5) # y_score = clf.fit(X_train, y_train).decision_function(X_test) # y_score2 = clf_lda.fit(X_train, y_train).decision_function(X_test) # fpr = dict() # tpr = dict() # roc_auc = dict() # fpr, tpr, _ = roc_curve(y_test, y_score) # fpr2, tpr2, _ = roc_curve(y_test, y_score2) # roc_auc = auc(fpr, tpr) # roc_auc2 = auc(fpr2, tpr2) #plt.figure() #lw = 2 #plt.plot(fpr, tpr, color='darkorange', # lw=lw, label='ROC curve SVM (area = %0.2f)' % roc_auc) # plt.plot(fpr2, tpr2, color='green', # lw=lw, label='ROC curve LDA(area = %0.2f)' % roc_auc2) # plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--') # plt.xlim([0.0, 1.0]) # plt.ylim([0.0, 1.05]) # plt.xlabel('False Positive Rate') # plt.ylabel('True Positive Rate') # plt.title('Receiver operating characteristic') # plt.legend(loc="lower right") # plt.show() targets2 = np.reshape(targets2,(len(data),1)) #print (targets2) for i in range (0,int(nof)): if(np.all(np.isfinite(data[i]))==False): for y in range (0,len(data[i])): if(np.isnan(data[i,y])): data[i,y] = 0.4 #parameters = {'kernel': ('linear', 'rbf'), 'C': [50,60,70,80,90,100,110,120,130,140,150,300,400]} #svr = svm.SVC() #clf8 = grid_search.GridSearchCV(svr, parameters) c, r = targets2.shape targets2 = targets2.reshape(c,) #clf8.fit(data, targets2) #print(clf8.best_params_) #time.sleep(10) clf.fit(data, targets2) clf_lda.fit(data, targets2) # for i in range (0,len(data)): #print(data[i].reshape(1,-1)) # a=clf.predict(data[i].reshape(1,-1)) # b=clf_lda.predict(data[i].reshape(1,-1)) # if(a==[1.]): # print('concentrated') # else: # print('distracted') # if(b==[1.]): # print('lda concentrated') # else: # print('lda distracted') joblib.dump(clf, 'classifier.pkl') joblib.dump(clf_lda, 'classifier_lda.pkl')
print("Accuracy of PassiveAggressiveClassifier=", accuracy_score(y_test,pac_pred),"\n") print("Classification of PassiveAggressiveClassifier\n\n",classification_report(y_test,pac_pred),"\n") print("Confusion matrix of PassiveAggressiveClassifier\n\n\n",confusion_matrix(y_test,pac_pred)) # In[72]: from sklearn.discriminant_analysis import LinearDiscriminantAnalysis # In[73]: lda = LinearDiscriminantAnalysis() lda.fit(X_train, y_train) # In[74]: lda_pred = lda.predict(X_test) # In[75]: print("Accuracy of LinearDiscriminantAnalysi=", accuracy_score(y_test,lda_pred),"\n") print("Classification of LinearDiscriminantAnalysi\n\n",classification_report(y_test,lda_pred),"\n") print("Confusion matrix of LinearDiscriminantAnalysi\n\n\n",confusion_matrix(y_test,lda_pred))
ypred_BRF_ds, average='weighted') recall_score_wt_BRF_ds = metrics.recall_score(y_test, ypred_BRF_ds, average='weighted') print('F1-score_micro = ', f1_score_micro_BRF_ds) print('F1-score = ', f1_score_wt_BRF_ds) print('Precision = ', precision_score_wt_BRF_ds) print('Recall Score = ', recall_score_wt_BRF_ds) # ###### Bagging with LDA # In[34]: clf = BaggingClassifier(LinearDiscriminantAnalysis()) clf.fit(X1, ds_ytrain) ypred_BLDA_ds = clf.predict(X2) # In[35]: print( '********Bagging with LDA Classifier, Sampled, Standard Scaled, Variance threshold', '********') f1_score_micro_BLDA_ds = metrics.f1_score(y_test, ypred_BLDA_ds, average='micro') f1_score_wt_BLDA_ds = metrics.f1_score(y_test, ypred_BLDA_ds,
#DecisionTreeClassifier from sklearn.tree import DecisionTreeClassifier steps = [('scaler', StandardScaler()), ('red_dim', PCA()), ('clf', DecisionTreeClassifier())] from sklearn.pipeline import Pipeline pipeline = Pipeline(steps) n_features_to_test = np.arange(1, 11) parameteres = [{ 'scaler': scalers_to_test, 'red_dim': [LinearDiscriminantAnalysis()], 'red_dim__n_components': [2], 'clf__criterion': ['gini', 'entropy'] }, { 'scaler': scalers_to_test, 'red_dim': [PCA()], 'red_dim__n_components': n_features_to_test, 'clf__criterion': ['gini', 'entropy'] }] from sklearn.model_selection import GridSearchCV from sklearn.model_selection import RandomizedSearchCV grid = GridSearchCV(pipeline, param_grid=parameteres, cv=5, verbose=1) grid.fit(X_train, y_train)
def __init__(self, configs: object): super().__init__(configs.model.model_name, configs.device) from sklearn.discriminant_analysis import LinearDiscriminantAnalysis self.lda_cls = LinearDiscriminantAnalysis()