def run_lda_2(X_train, X_test, y_train, y_test, dataset): # model = LinearDiscriminantAnalysis(solver='eigen',n_components=y_train.groupby(y_train.columns[0]).count().shape[0]) model = LinearDiscriminantAnalysis( n_components=y_train.groupby(y_train.columns[0]).count().shape[0] - 1) score_df = pd.DataFrame() # k_max = X_train.shape[1]-1 # if k_max > 120: k_max = 120 k_max = y_train.groupby(y_train.columns[0]).count().shape[0] for i in range(1, k_max): LOGGER.info('lda: k={}'.format(i)) model.set_params(n_components=i) # model = LinearDiscriminantAnalysis(n_components=i) # lda_X = model.fit_transform(X_train,y_train[y_train.columns[0]]) model.fit(X_train, y_train[y_train.columns[0]]) # lda_test_X = model.transform(X_test) # y_pred = model.predict(lda_test_X) # print(y_pred) score_df.loc[i, 'test_score'] = model.score(X_test, y_test[y_test.columns[0]]) score_df.loc[i, 'train_score'] = model.score(X_train, y_train[y_train.columns[0]]) print(score_df) model = LinearDiscriminantAnalysis( n_components=y_train.groupby(y_train.columns[0]).count().shape[0] - 1) model.fit(X_train, y_train[y_train.columns[0]]) result_df = pd.DataFrame(data=model.explained_variance_ratio_, columns=['ex_variance'], index=range(len(model.explained_variance_ratio_))) title = 'lda_explained_variance' x = 'components' y = 'variance contributed' LOGGER.info('plotting {}'.format(title)) plt.clf() plt.title(title) plt.xlabel(x) plt.ylabel(y) plt.grid() # sig_vec = [np.abs(i)/np.sum(result_df['ex_variance']) for i in result_df['ex_variance']] plt.step(range(0, result_df.shape[0]), np.cumsum(result_df['ex_variance']), label='cumulative explained variance') plt.bar(range(0, result_df.shape[0]), result_df['ex_variance'], align='center', label='explained variance')
def from_config(cls, config: Dict[str, Any]) -> "SkLDA": """Deserialize from an object.""" if not HAS_SKLEARN: raise DataProcessorError( f"SKlearn is needed to initialize an {cls.__name__}.") lda = LinearDiscriminantAnalysis() lda.set_params(**config["params"]) for name, value in config["attributes"].items(): if value is not None: setattr(lda, name, value) return SkLDA(lda)
def dimensionality_reduction(TrainFeatures, TestFeatures, Method, params): """ It performs dimensionality reduction of a training and a test features matrix stored in a .h5 file each. It's possible to use 5 different methods for dimensionality reduction. _____________________________________________________________________________________ Parameters: - TrainFeatures: string It is the path of an .h5 file of the training features. It contains at least the following datasets: - 'feats': array-like, shape (n_samples, n_features) - 'labels': array-like, shape (n_samples, ) - 'img_ids': array-like, shape (n_samples, ) - TestFeatures: string It is the path of an .h5 file of the test features. It contains at least the same datasets. - Method: string Possible value are: -'PCA': Principal component analysis -'t-SNE': t-distributed Stochastic Neighbor Embedding -'TruncatedSVD': Truncated SVD -'NMF': Non-Negative Matrix Factorization -'LDA': Linear Discriminant Analysis - params: dict It is a dictionary containig parameters for the selected estimator. Keys and possible values are listed on the following websites: http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html http://scikit-learn.org/stable/modules/generated/sklearn.discriminant_analysis.LinearDiscriminantAnalysis.html http://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.NMF.html http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.TruncatedSVD.html For t-SNE, an additional key is needed: params['reduce'] with possible values 'TruncatedSVD','PCA','None'. It is highly recommended to use another dimensionality reduction method (e.g. PCA for dense data or TruncatedSVD for sparse data) to reduce the number of dimensions to a reasonable amount (e.g. 50) if the number of features is very high. This will suppress some noise and speed up the computation of pairwise distances between samples. - params['reduce']='TruncatedSVD' : Truncated SVD --> t-SNE - params['reduce']='PCA' : PCA --> t-SNE - params['reduce']='None' : t-SNE directly Returns: - X_train: array-like, shape (n_samples, n_components) - X_test: array-like, shape (n_samples, n_components) - ax: matplotlib.axes._subplots.AxesSubplot object (if n_components<=3) or None (if n_components>3) Furthermore, automatically 2 new .h5 files containing 3 datasets each (one for reduced features, one for labels and one for img_ids) are generated in the folder Results/ReducedFeatures and also if n_components is <= 3 a scatter plot is saved in the folder Results/Plots Example usage: import FeaturesReduction as fr import matplotlib.pyplot as plt params={'n_components':3} X_train,X_test,ax=fr.dimensionality_reduction('TrainingFeatures.h5','TestFeatures.h5','PCA',params) plt.show() """ s = os.sep # Load training features file train = h5py.File(TrainFeatures, 'r') train_features = train['feats'] train_labels = train['labels'] train_labels = np.squeeze(train_labels) train_img_ids = train['img_id'] # Get categories of the training set from features ids categories = mf.get_categories(train_img_ids) # Load test features file test = h5py.File(TestFeatures, 'r') test_features = test['feats'] test_labels = test['labels'] test_labels = np.squeeze(test_labels) test_img_ids = test['img_id'] n_comp = params['n_components'] if Method != 'NMF': # Standardize features by removing the mean and scaling to unit variance scaler = StandardScaler().fit(train_features) train_features = scaler.transform(train_features) test_features = scaler.transform(test_features) if Method == 'PCA': # Get PCA model pca = PCA() # Set parameters pca.set_params(**params) # Fit the model with the training features and # apply dimensional reduction to training features X_train = pca.fit_transform(train_features) # Apply dimensional reduction to test features X_test = pca.transform(test_features) elif Method == 'NMF': params['verbose'] = True # Get NMF model nmf = NMF() # Set parameters nmf.set_params(**params) # Fit the model with the training features and # apply dimensional reduction to training features X_train = nmf.fit_transform(train_features) # Apply dimensional reduction to test features X_test = nmf.transform(test_features) elif Method == 'LDA': # Get LDA model lda = LDA() # Set parameters lda.set_params(**params) # Fit the model with the training features #lda.fit(train_features,train_labels) # apply dimensional reduction to training features #X_train = lda.transform(train_features) X_train = lda.fit_transform(train_features, train_labels) # apply dimensional reduction to training features #X_train = lda.transform(train_features) # Apply dimensional reduction to test features X_test = lda.transform(test_features) elif Method == 't-SNE': red = params['reduce'] del params['reduce'] print(red) params['verbose'] = True # Use another dimensionality reduction method (PCA for dense # data or TruncatedSVD for sparse data) to reduce the number of # dimensions to a reasonable amount (e.g. 50) if the number of # features is very high. This will suppress some noise and speed # up the computation of pairwise distances between samples. if n_comp < 50: K = 50 else: K = n_comp * 2 if red == 'TruncatedSVD': # Get TruncatedSVD model svd = TruncatedSVD(n_components=K) # Fit the model with the training features and # apply dimensional reduction to training features train_features = svd.fit_transform(train_features) # Apply dimensional reduction to test features test_features = svd.transform(test_features) elif red == 'PCA': # Get PCA model pca = PCA(n_components=K) # Fit the model with the training features and # apply dimensional reduction to training features train_features = pca.fit_transform(train_features) # Apply dimensional reduction to test features test_features = pca.transform(test_features) else: pass # Get t-SNE model tsne = TSNE() # Set parameters tsne.set_params(**params) # Concatenate training and test set n_train = train_features.shape[0] features = np.concatenate((train_features, test_features), axis=0) # Fit the model with the data and apply dimensional reduction X = tsne.fit_transform(features) # Separate training and test set X_train = X[:n_train, :] X_test = X[n_train:, :] elif Method == 'TruncatedSVD': # Get TruncatedSVD model svd = TruncatedSVD() # Set parameters svd.set_params(**params) # Fit the model with the training features and # apply dimensional reduction to training features X_train = svd.fit_transform(train_features) # Apply dimensional reduction to test features X_test = svd.transform(test_features) else: raise TypeError( "Invalid method: possible methods are 'PCA', 't-SNE', 'TruncatedSVD', 'NMF' and 'LDA'" ) # Create folder in which save reduced features mf.folders_creator('Results', ['ReducedFeatures']) # Create an .h5 file and store in it reduced training set name = 'Results' + s + 'ReducedFeatures' + s + Method + str( n_comp) + '_' + TrainFeatures.split(s)[-1].split('.')[0] + '.h5' f = h5py.File(name, "w") f.create_dataset('img_id', data=train_img_ids[:], dtype="S40") f.create_dataset('labels', data=train_labels.T, compression="gzip") if Method == 'PCA': f.create_dataset('pca', data=X_train.T, compression="gzip") elif Method == 't-SNE': f.create_dataset('tsne', data=X_train.T, compression="gzip") elif Method == 'TruncatedSVD': f.create_dataset('tsvd', data=X_train.T, compression="gzip") elif Method == 'LDA': f.create_dataset('lda', data=X_train.T, compression="gzip") elif Method == 'NMF': f.create_dataset('nmf', data=X_train.T, compression="gzip") f.close() # Create an .h5 file and store in it reduced test set name = 'Results' + s + 'ReducedFeatures' + s + Method + str( n_comp) + '_' + TestFeatures.split(s)[-1].split('.')[0] + '.h5' f = h5py.File(name, "w") f.create_dataset('img_id', data=test_img_ids[:], dtype="S40") f.create_dataset('labels', data=test_labels.T, compression="gzip") if Method == 'PCA': f.create_dataset('pca', data=X_test.T, compression="gzip") elif Method == 't-SNE': f.create_dataset('tsne', data=X_test.T, compression="gzip") elif Method == 'TruncatedSVD': f.create_dataset('tsvd', data=X_test.T, compression="gzip") elif Method == 'LDA': f.create_dataset('lda', data=X_test.T, compression="gzip") elif Method == 'NMF': f.create_dataset('nmf', data=X_test.T, compression="gzip") f.close() if n_comp < 4: # Get folders list of the test set from features ids test_folders = mf.get_categories(test_img_ids) # Get number of folders n_folders_test = len(test_folders) # Make some names for the plot legend tf = [] for i in range(n_folders_test): tf.append('Test' + str(i)) # Define a list of colors in exadecimal format if len(categories) + n_folders_test < 9: colors = [ '#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#00FFFF', '#808080', '#FF00FF', '#000000' ] else: n = 250 max_value = 255**3 interval = int(max_value / n) colors = [ '#' + hex(i)[2:].zfill(6) for i in range(0, max_value, interval) ] colors = colors[:int((n + 1) / 10 * 9)] random.shuffle(colors) # Create a folder to save images mf.folders_creator('Results', ['Plots']) # Create a name to save image name = Method + str(n_comp) + '_' + TrainFeatures.split(s)[-1].split( '.')[0] name = name.split('_') name = '_'.join(name[:-1]) print(X_train.shape) print(X_test.shape) if n_comp == 1: # Plot 1D Data with different colors fig, ax = plt.subplots() for i in range(len(categories)): ax.scatter(X_train[train_labels == i, 0], np.ones(X_train[train_labels == i, 0].shape), c=colors[i], label=categories[i]) k = len(categories) for i in range(n_folders_test): ax.scatter(X_test[test_labels == i, 0], np.ones(X_test[test_labels == i, 0].shape), c=colors[k], label=tf[i]) k += 1 ax.legend() # Save image in .png format plt.savefig('Results' + s + 'Plots' + s + name + '.png') if n_comp == 2: # Plot 2D Data with different colors fig, ax = plt.subplots() for i in range(len(categories)): ax.scatter(X_train[train_labels == i, 0], X_train[train_labels == i, 1], c=colors[i], label=categories[i]) k = len(categories) for i in range(n_folders_test): ax.scatter(X_test[test_labels == i, 0], X_test[test_labels == i, 1], c=colors[k], label=tf[i]) k += 1 ax.legend() # Save image in .png format plt.savefig('Results' + s + 'Plots' + s + name + '.png') # Remove outliers out_train = mf.is_outlier(X_train, thresh=3.5) out_test = mf.is_outlier(X_test, thresh=3.5) out_train = np.logical_not(out_train) out_test = np.logical_not(out_test) X_train2 = X_train[out_train, :] X_test2 = X_test[out_test, :] if X_train2.shape[0] != X_train.shape[0] or X_test2.shape[ 0] != X_test.shape[0]: train_labels2 = train_labels[out_train] test_labels2 = test_labels[out_test] # Plot 2D Data without outliers with different colors fig, ax = plt.subplots() for i in range(len(categories)): ax.scatter(X_train2[train_labels2 == i, 0], X_train2[train_labels2 == i, 1], c=colors[i], label=categories[i]) k = len(categories) for i in range(n_folders_test): ax.scatter(X_test2[test_labels2 == i, 0], X_test2[test_labels2 == i, 1], c=colors[k], label=tf[i]) k += 1 ax.legend() # Save image in .png format plt.savefig('Results' + s + 'Plots' + s + name + '_noOutliers.png') if n_comp == 3: mf.folders_creator('Results' + s + 'Plots', ['tmp']) # Plot 3-D Data with different colors ax = plt.subplot(111, projection='3d') for i in range(len(categories)): ax.scatter(X_train[train_labels == i, 0], X_train[train_labels == i, 1], X_train[train_labels == i, 2], c=colors[i], label=categories[i]) k = len(categories) for i in range(n_folders_test): ax.scatter(X_test[test_labels == i, 0], X_test[test_labels == i, 1], X_test[test_labels == i, 2], c=colors[k], label=tf[i]) k += 1 ax.legend(loc='upper left', numpoints=1, ncol=3, fontsize=8, bbox_to_anchor=(0, 0)) # Rotate for 360° and save every 10° for angle in range(0, 360, 10): ax.view_init(30, angle) plt.savefig('Results' + s + 'Plots' + s + 'tmp' + s + name + str(angle) + '.png') # Save as a .gif image mf.imagesfolder_to_gif('Results' + s + 'Plots' + s + name + '.gif', 'Results' + s + 'Plots' + s + 'tmp', 0.2) shutil.rmtree('Results' + s + 'Plots' + s + 'tmp') else: ax = None return X_train, X_test, ax
def run_classifier(args): """ Main function runs training and testing of Heuristic based machine learning models (SVM, LDA) Input: argument passes through argparse. Each argument is described in the --help of each arguments. Output: No return, but generates a .txt file results of testing including accuracy of the models. """ ########## PRAMETER SETTINGS ############## MODE = args.laterality CLASSIFIER = args.classifiers SENSOR = args.sensors ############################################ sensor_str = '_'.join(SENSOR) RESULT_NAME = './results/' + CLASSIFIER + '/' + CLASSIFIER + '_' + MODE + '_' + sensor_str + '_subjects_accuracy.txt' SAVE_NAME = './checkpoints/' + CLASSIFIER + '/' + CLASSIFIER + '_' + MODE + '_' + sensor_str + '_subjects.pkl' if not os.path.exists('./results/' + CLASSIFIER): os.makedirs('./results/' + CLASSIFIER) subjects = [ '156', '185', '186', '188', '189', '190', '191', '192', '193', '194' ] subject_data = [] # Loading/saving the ENABL3S dataset if args.data_saving: print("Loading datasets...") for subject in subjects: subject_data.append( EnableDataset(subject_list=[subject], model_type=CLASSIFIER, sensors=SENSOR, mode=MODE)) save_object(subject_data, SAVE_NAME) else: with open(SAVE_NAME, 'rb') as input: subject_data = pickle.load(input) correct = 0 steady_state_correct = 0 tot_steady_state = 0 transitional_correct = 0 tot_transitional = 0 # Define cross-validation parameters skf = KFold(n_splits=len(subject_data), shuffle=True) # Define PCA parameters scale = preprocessing.StandardScaler() pca = PCA() scale_PCA = Pipeline([('norm', scale), ('dimred', pca)]) if CLASSIFIER == 'LDA': model = LinearDiscriminantAnalysis() elif CLASSIFIER == 'SVM': model = SVC(kernel='linear', C=10) accuracies = [] ss_accuracies = [] tr_accuracies = [] subject_numb = [] i = 0 # main training/testing loop for train_index, test_index in skf.split(subject_data): print("**************FOLD {}*********".format(i + 1)) print(train_index, test_index) train_set = [subject_data[i] for i in train_index] test_set = [subject_data[i] for i in test_index] BIO_train = torch.utils.data.ConcatDataset(train_set) wholeloader = DataLoader(BIO_train, batch_size=len(BIO_train)) for batch, label, dtype in tqdm(wholeloader): X_train = batch y_train = label types_train = dtype BIO_test = torch.utils.data.ConcatDataset(test_set) wholeloader = DataLoader(BIO_test, batch_size=len(BIO_train)) for batch, label, dtype in tqdm(wholeloader): X_test = batch y_test = label types_test = dtype if CLASSIFIER == 'LDA': scale_PCA.fit(X_train) feats_train_PCA = scale_PCA.transform(X_train) feats_test_PCA = scale_PCA.transform(X_test) pcaexplainedvar = np.cumsum( scale_PCA.named_steps['dimred'].explained_variance_ratio_) pcanumcomps = min(min(np.where(pcaexplainedvar > 0.95))) + 1 unique_modes = np.unique(y_train) model.set_params(priors=np.ones(len(unique_modes)) / len(unique_modes)) model.fit(feats_train_PCA, y_train) y_pred = model.predict(feats_test_PCA) elif CLASSIFIER == 'SVM': scale.fit(X_train) feats_train_norm = scale.transform(X_train) feats_test_norm = scale.transform(X_test) model.fit(feats_train_norm, y_train) y_pred = model.predict(feats_test_norm) # append model performance metrics correct = (y_pred == np.array(y_test)).sum().item() tot = len(y_test) steady_state_correct = (np.logical_and(y_pred == np.array(y_test), types_test == 1)).sum().item() tot_steady_state = (types_test == 1).sum().item() transitional_correct = (np.logical_and(y_pred == np.array(y_test), types_test == 0)).sum().item() tot_transitional = (types_test == 0).sum().item() accuracies.append(accuracy_score(y_test, y_pred)) tot_acc = correct / tot ss_acc = steady_state_correct / tot_steady_state if tot_steady_state != 0 else "No steady state samples used" tr_acc = transitional_correct / tot_transitional if tot_transitional != 0 else "No transitional samples used" ss_accuracies.append( ss_acc ) if tot_steady_state != 0 else "No steady state samples used" tr_accuracies.append( tr_acc ) if tot_transitional != 0 else "No transitional samples used" subject_numb.append(test_index[0]) print("Total accuracy: {}".format(accuracy_score(y_test, y_pred))) print("Total correct: {}, number: {}, accuracy: {}".format( correct, tot, tot_acc)) print("Steady-state correct: {}, number: {}, accuracy: {}".format( steady_state_correct, tot_steady_state, ss_acc)) print("Transistional correct: {}, number: {}, accuracy: {}".format( transitional_correct, tot_transitional, tr_acc)) i += 1 print('********************SUMMARY*****************************') print('Accuracy_,mean:', np.mean(accuracies), 'Accuracy_std: ', np.std(accuracies)) print('SR Accuracy_,mean:', np.mean(ss_accuracies), 'Accuracy_std: ', np.std(ss_accuracies)) print('TR Accuracy_,mean:', np.mean(tr_accuracies), 'Accuracy_std: ', np.std(tr_accuracies)) print('writing...') with open(RESULT_NAME, 'w') as f: f.write('total ') for item in accuracies: f.write("%s " % item) f.write('\n') f.write('steadystate ') for item in ss_accuracies: f.write("%s " % item) f.write('\n') f.write('transitional ') for item in tr_accuracies: f.write("%s " % item) f.write('\n') f.write('subject_numb ') for item in subject_numb: f.write("%s " % item) f.close()
def run_classifier(mode='bilateral', classifier='LDA', sensor=["imu", "emg", "goin"]): MODE = mode CLASSIFIER = classifier SENSOR = sensor sensor_str = '_'.join(SENSOR) RESULT_NAME = './results/' + CLASSIFIER + '/' + CLASSIFIER + '_' + MODE + '_' + sensor_str + '_subjects_accuracy.txt' if not os.path.exists('./results/' + CLASSIFIER): os.makedirs('./results/' + CLASSIFIER) subjects = [ '156', '185', '186', '188', '189', '190', '191', '192', '193', '194' ] subject_data = [] for subject in subjects: subject_data.append( EnableDataset(subject_list=[subject], model_type=CLASSIFIER, sensors=SENSOR, mode=MODE)) correct = 0 steady_state_correct = 0 tot_steady_state = 0 transitional_correct = 0 tot_transitional = 0 # Define cross-validation parameters skf = KFold(n_splits=len(subject_data), shuffle=True) scale = preprocessing.StandardScaler() pca = PCA() scale_PCA = Pipeline([('norm', scale), ('dimred', pca)]) if CLASSIFIER == 'LDA': model = LinearDiscriminantAnalysis() elif CLASSIFIER == 'SVM': model = SVC(kernel='linear', C=10) # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1) accuracies = [] ss_accuracies = [] tr_accuracies = [] subject_numb = [] i = 0 for train_index, test_index in skf.split(subject_data): print("**************FOLD {}*********".format(i + 1)) print(train_index, test_index) train_set = [subject_data[i] for i in train_index] test_set = [subject_data[i] for i in test_index] BIO_train = torch.utils.data.ConcatDataset(train_set) wholeloader = DataLoader(BIO_train, batch_size=len(BIO_train)) for batch, label, dtype in tqdm(wholeloader): X_train = batch y_train = label types_train = dtype BIO_test = torch.utils.data.ConcatDataset(test_set) wholeloader = DataLoader(BIO_test, batch_size=len(BIO_train)) for batch, label, dtype in tqdm(wholeloader): X_test = batch y_test = label types_test = dtype if CLASSIFIER == 'LDA': scale_PCA.fit(X_train) feats_train_PCA = scale_PCA.transform(X_train) feats_test_PCA = scale_PCA.transform(X_test) pcaexplainedvar = np.cumsum( scale_PCA.named_steps['dimred'].explained_variance_ratio_) pcanumcomps = min(min(np.where(pcaexplainedvar > 0.95))) + 1 unique_modes = np.unique(y_train) model.set_params(priors=np.ones(len(unique_modes)) / len(unique_modes)) model.fit(feats_train_PCA, y_train) y_pred = model.predict(feats_test_PCA) elif CLASSIFIER == 'SVM': scale.fit(X_train) feats_train_norm = scale.transform(X_train) feats_test_norm = scale.transform(X_test) model.fit(feats_train_norm, y_train) y_pred = model.predict(feats_test_norm) correct = (y_pred == np.array(y_test)).sum().item() tot = len(y_test) steady_state_correct = (np.logical_and(y_pred == np.array(y_test), types_test == 1)).sum().item() tot_steady_state = (types_test == 1).sum().item() transitional_correct = (np.logical_and(y_pred == np.array(y_test), types_test == 0)).sum().item() tot_transitional = (types_test == 0).sum().item() accuracies.append(accuracy_score(y_test, y_pred)) tot_acc = correct / tot ss_acc = steady_state_correct / tot_steady_state if tot_steady_state != 0 else "No steady state samples used" tr_acc = transitional_correct / tot_transitional if tot_transitional != 0 else "No transitional samples used" ss_accuracies.append( ss_acc ) if tot_steady_state != 0 else "No steady state samples used" tr_accuracies.append( tr_acc ) if tot_transitional != 0 else "No transitional samples used" subject_numb.append(test_index[0]) print("Total accuracy: {}".format(accuracy_score(y_test, y_pred))) print("Total correct: {}, number: {}, accuracy: {}".format( correct, tot, tot_acc)) print("Steady-state correct: {}, number: {}, accuracy: {}".format( steady_state_correct, tot_steady_state, ss_acc)) print("Transistional correct: {}, number: {}, accuracy: {}".format( transitional_correct, tot_transitional, tr_acc)) # print(accuracy_score(y_test, y_pred)) i += 1 print('********************SUMMARY*****************************') # print('Accuracy_total:', correct/len(BIO_train)) print('Accuracy_,mean:', np.mean(accuracies), 'Accuracy_std: ', np.std(accuracies)) print('SR Accuracy_,mean:', np.mean(ss_accuracies), 'Accuracy_std: ', np.std(ss_accuracies)) print('TR Accuracy_,mean:', np.mean(tr_accuracies), 'Accuracy_std: ', np.std(tr_accuracies)) # model.fit(X_train, y_train) # total_accuracies = accuracies + ss_accuracies + tr_accuracies print('writing...') with open(RESULT_NAME, 'w') as f: f.write('total ') for item in accuracies: f.write("%s " % item) f.write('\n') f.write('steadystate ') for item in ss_accuracies: f.write("%s " % item) f.write('\n') f.write('transitional ') for item in tr_accuracies: f.write("%s " % item) f.write('\n') f.write('subject_numb ') for item in subject_numb: f.write("%s " % item) f.close()
class LDA(object): def __init__(self, solver="svd", shrinkage=None, priors=None, n_components=None, store_covariance=False, tol=1e-4): """ :param solver: string, 可选项,"svd","lsqr", "eigen"。 默认使用svd, 不计算协方差矩阵,适用于大量特征 的数据, 最小二乘 lsqr, 结合shrinkage 使用。 eigen 特征值分解, 集合shrinkage 使用 :param shrinkage: str/float 可选项,概率值,默认为None, "auto", 自动收缩, 0到1内的float, 固定的收缩参数 :param priors: array, optional, shape (n_classes,) 分类优先 :param n_components: # 分量数, 默认None, int, 可选项 :param store_covariance: bool, 可选项, 只用于”svd“ 额外计算分类协方差矩阵 :param tol: 浮点型,默认1e-4, 在svd 中,用于排序评估的阈值 """ self.model = LinearDiscriminantAnalysis( solver=solver, shrinkage=shrinkage, priors=priors, n_components=n_components, store_covariance=store_covariance, tol=tol) def fit(self, x, y): self.model.fit(X=x, y=y) def transform(self, x): return self.model.transform(X=x) def fit_transform(self, x, y): return self.model.fit_transform(X=x, y=y) def get_params(self, deep=True): return self.model.get_params(deep=deep) def set_params(self, **params): self.model.set_params(**params) def decision_function(self, x): self.model.decision_function(X=x) def predict(self, x): return self.model.predict(X=x) def predict_log_proba(self, x): return self.model.predict_log_proba(X=x) def predict_proba(self, x): return self.model.predict_proba(X=x) def score(self, x, y, sample_weight): return self.model.score(X=x, y=y, sample_weight=sample_weight) def get_attributes(self): # 生成模型之后才能获取相关属性值 coef = self.model.coef_ # 权重向量, intercept = self.model.intercept_ # 截距项 covariance = self.model.covariance_ # 协方差矩阵 explained_variance_ratio = self.model.explained_variance_ratio_ means = self.model.means_ priors = self.model.priors_ # 分类等级, 求和为1 shape (n_classes) scalings = self.model.scalings_ # shape(rank,n_classes-1). 缩放 xbar = self.model.xbar_ # 所有的均值 classes = self.model.classes_ # 分类标签 return coef, intercept, covariance, explained_variance_ratio, means, priors, scalings, xbar, classes