def accaracy_measures(model, points, conf_mat=False, roc_curve=False, pre_recall_curve=False): x_plot_area = int(conf_mat) + int(roc_curve) + 1 #viz.plt.figure() # visualization confusion matrix if conf_mat: print len(y_set) conf_matrix = confusion_matrix(y_set, model.predict(x_set)) viz.plt.subplot(1, x_plot_area, x_plot_area - 1) viz.plot_confusion_matrix(conf_matrix, classes=[0, 1], title='Confusion matrix') # visualize ROC curve if roc_curve: viz.plt.subplot(1, x_plot_area, x_plot_area - 2) x, y, _ = ROC_Cruve(y_set, model.predict_proba(x_set)[:, 1]) viz.plt.plot(x, y) #viz.plot_roc_curve(points['y-test'], model.predict(points['x-test'])) if pre_recall_curve: viz.plot_recision_recall(points['y-test'], model.predict(points['x-test'])) viz.plt.show()
def confusion_matrix(mlp, epochs, bs): test_prediction, train_prediction = test_mlp_model(mlp, epochs, 30, print_lists=False, plot=False, get_predictions=True) plot_confusion_matrix(y_test, test_prediction, "Test data confusion_matrix") plot_confusion_matrix(y_train, train_prediction, "Train data confusion_matrix")
def main(num): pkl_file = open('features_vector.pkl', 'rb') ts_features, y = pickle.load(pkl_file) pkl_file.close() names = [ "Nearest Neighbors", "Linear SVM", "RBF SVM", "Gaussian Process", "Decision Tree", "Random Forest", "Neural Net", "AdaBoost", "Naive Bayes", "QDA" ] accuracy = {} for name in names: accuracy[name] = [] for i in range(num): classifiers = [ KNeighborsClassifier(3), SVC(kernel="linear", C=0.025), SVC(gamma=2, C=1), GaussianProcessClassifier(1.0 * RBF(1.0)), DecisionTreeClassifier(max_depth=5), RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1), MLPClassifier(alpha=1, max_iter=1000), AdaBoostClassifier(), GaussianNB(), QuadraticDiscriminantAnalysis() ] train_x, test_x, train_y, test_y = \ train_test_split(ts_features, y, test_size=0.4) for name, clf in zip(names, classifiers): clf.fit(train_x, train_y) score = clf.score(test_x, test_y) if name == 'Decision Tree' or name == 'Naive Bayes': pred_y = clf.predict(test_x) plt.figure() plot_confusion_matrix([pred_y], [test_y], LABELS) save(name + '_confusion_matrix_' + str(i) + '.png') # print(name, ': ', score) accuracy[name].append(score) print(accuracy) for name in names: print(name + ': ', np.array(accuracy[name]).mean())
def save_confusion_matrix(data_root, output_root, segmenter, data_subset="val"): dataset = SegmentationDataset(data_root, data_subset) confusion_matrix_caluclator = ConfusionMatrix(num_classes=2, average="precision") accuracy_calculator = Accuracy() for image, mask_gt in dataset: mask_pred = segmenter.get_raw_prediction(image) mask_gt = torch.from_numpy(mask_gt).to( mask_pred.device).unsqueeze(0).unsqueeze(0) output = (mask_pred, mask_gt) confusion_matrix_caluclator.update( output_transform_confusion_matrix(output)) accuracy_calculator.update(output_transform_accuracy(output)) confusion_matrix = confusion_matrix_caluclator.compute() accuracy = accuracy_calculator.compute() cm_figure = plot_confusion_matrix(confusion_matrix) filename_base = f"confusion_matrix_acc={accuracy:.6f}" cm_figure.savefig(os.path.join(output_root, filename_base + ".pdf")) cm_figure.savefig(os.path.join(output_root, filename_base + ".png"))
def test_model(model, test_loader): # Test the model on test set with torch.no_grad(): y_true = [] y_pred = [] correct = 0 total = 0 for i, (inputs, labels) in enumerate(test_loader): inputs = inputs.reshape(-1, sequence_length, input_size).to(device) labels = labels.reshape(-1).to(device) outputs = model(inputs) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() y_true.append(labels.item()) y_pred.append(predicted.item()) print('Final accuracy is {} %'.format((correct / total) * 100)) plot_confusion_matrix([y_true], [y_pred], LABELS) save('test_confusion_matrix' + '.png') # Save the modal checkpoint torch.save(model.state_dict(), 'DeepNAR_model.ckpt')
def ML_with_BN_feat(bn_feat_file='../data/factors_n_bn_feat.csv', n_comp=100, plotting=False): plt.close('all') if n_comp < 50: n_comp = 50 # Importing the bottleneck features for each image feat_df = pd.read_csv(bn_feat_file, index_col=0, dtype='unicode') # feat_df = feat_df.sample(frac=0.05) print('Data frame shape:', feat_df.shape) # feat_df = feat_df.iloc[0:300,:] mask = feat_df.loc[:, 'label'].isin(['Parasitized', 'Uninfected']) feat_df = feat_df.loc[mask, :].drop_duplicates() print('Number of bottleneck features:', feat_df.shape[1] - 7) y = feat_df.loc[:, ['label']].values print(type(y), y.shape) print('Number of samples for each label \n', feat_df.groupby('label')['label'].count()) X = feat_df.loc[:, 'x0':'x2047'].astype(float).values # print(list(feat_df.loc[:, 'x0':].columns)) ##-- Dealing with imbalanced data # from imblearn.over_sampling import RandomOverSampler # ros = RandomOverSampler(random_state=0) # # X_resampled, y_resampled = ros.fit_sample(X, y[:,0]) # # from collections import Counter # print(sorted(Counter(y_resampled).items())) # # X, y = X_resampled, y_resampled # checking for nulls in DF #nulls = BN_featues.isnull().any(axis=1) # checking for nulls in DF #nulls = BN_featues.isnull().any(axis=1) # In[3]: class_names = set(feat_df.loc[:, 'label']) # Binarize the labels # print(class_names) # lb = label_binarize(y = y, classes = list(class_names)) # classes.remove('unknown') # lb.fit(y) #for LabelBinarizer not lable_binerize() # lb.classes_ #for LabelBinarizer not lable_binerize # Split the training data for cross validation X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) ##### Dimensionality Reduction #### # In[4]: # Princple Component Analysis # Use n_components = None first to determine variability of principle components # Then limit the number of principle components that are reasonable # n_components=None --> min(n observation, n features) print('...running PCA analysis...' '') pca_none = PCA(n_components=None) pca_none.fit_transform(X_train) # print(X_test.shape, type(X_test)) # arr_index = np.where(X_test == '0.1465795w85188675') # print('arr_index', arr_index) # print('X_test[arr_index]',X_test[arr_index]) pca_none.transform(X_test) explained_variance = pca_none.explained_variance_ratio_ plt.figure(0) plt.plot(explained_variance) plt.xlabel('n_components') plt.ylabel('variance') plt.suptitle('Explained Variance of Principle Components') # plt.show(block=False) plt.savefig('../plots/pca_var_vs_ncomp.png') # #### After about 70 components there is very little variance gain #### # Applying Principle Component Decomposition # In[5]: # n_comp = 11 # the number of Principal Components to project/decompose the data into print('...running PCA with', n_comp, 'components') pca = PCA(n_components=n_comp) X_train = pca.fit_transform(X_train) X_test = pca.transform(X_test) explained_variance1 = pca.explained_variance_ratio_ plt.figure(1) plt.plot(explained_variance1) plt.xlabel('n_components') plt.ylabel('variance') plt.suptitle('Explained Variance of Principle Components') plt.show(block=False) plt.savefig('../plots/pca_var_vs_{}_ncomp.png'.format(n_comp)) # Save feature reduction PCA save_PCA = '../models/trained_PCA.sav' pickle.dump(pca, open(save_PCA, 'wb')) # In[6]: if plotting: # Pairwise plots of 11 PCA, note this only works with two labels feat_df_ploting = pd.DataFrame({'label': y_train[:, 0]}) caa_plot_pairs(X_train[:, :11], feat_df_ploting, 'PCA') plt.figure(figsize=(16, 24)) plt.show(block=False) # In[70]: # seaborn plot of PCA # need to add columns to pca X_train # conver to a dataframe #Pairwise plots of 11 components pca_DF = pd.DataFrame(X_train[:, :11]) df_y_train = pd.DataFrame(y_train, columns=['label']) #,'Date','group_idx']) df_pca_train = pd.concat([df_y_train, pca_DF], axis=1) # dates = list(set(df_pca_train['Date'])) # print(list(feat_df.columns)) feature_names = df_pca_train.columns[1:] n_comp_pca = pca_DF.shape[1] print('n_comp_pca', n_comp_pca) print('feature_names', feature_names) print('df_pca_train columns', list(df_pca_train.columns)) plt.close('all') # Set up plot to compare confusion matrices params = { 'axes.titlesize': 'x-large', # 'legend.fontsize': 'large', # 'figure.figsize': (15, 5), 'axes.labelsize': 'large', 'axes.titlesize': 'large', 'xtick.labelsize': 'medium', 'ytick.labelsize': 'medium' } plt.rcParams.update(params) fig, axs = plt.subplots(1, 4, sharey=True, figsize=(15, 8.5)) font = { 'linespacing': 1.5, #'family': 'serif', 'color': 'darkred', 'weight': 'normal', 'size': 14 } # ## Exploring Different Algorithms For Mutliclass Classfication #Metric in this case is F2 from sklearn.metrics import fbeta_score, make_scorer ftwo_scorer = make_scorer(fbeta_score, beta=2) # In[7.5]: # Let's scale the features and plug into logisitc regression classifier # from sklearn.preprocessing import StandardScaler # X_scaled = StandardScaler().fit_transform(X_train) from sklearn import linear_model log_reg_classifier = linear_model.LogisticRegression(penalty='l2', tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight=None, random_state=None, solver='liblinear', max_iter=100, multi_class='ovr', n_jobs=1) log_r = log_reg_classifier.fit(X_train, df_y_train['label'].values) y_test_predictions_log_r = log_r.predict(X_test) y_predict_prob_log_r = log_r.predict_proba(X_test) # save results into a DF results = pd.DataFrame() results['y_test'] = y_test[:, 0] results['log_r_pred'] = list(y_test_predictions_log_r) results['log_r_prob'] = y_predict_prob_log_r[:, 0] #Perform 3-fold cross validation and return the mean accuracy on each fold cv_scores_lr = cross_val_score(estimator=log_r, X=X_train, y=y_train) #, scoring = ftwo_scorer) print('Logistic regression cv_scores', cv_scores_lr) save_LR = '../models/trained_log_reg.sav' pickle.dump(log_reg_classifier, open(save_LR, 'wb')) # Confusion Matrix for Logistic Regresssion cmNB = confusion_matrix(y_test, y_test_predictions_log_r, labels=list(class_names)) plt.subplot(1, 4, 1) plot_confusion_matrix(cm1=cmNB, classes=class_names, normalize=True, gradientbar=False, title='Logistic Regression\n') cv_scores_lr = ["{:.2f}".format(x) for x in cv_scores_lr] p_r_fscore_lr = precision_recall_fscore_support(y_test, y_test_predictions_log_r, beta=2.0, labels=['Parasitized'], pos_label='Parasitized', average='binary') print(p_r_fscore_lr[:3]) plt.text( 0.01, -1, '\nCV Scores:\n' + str(cv_scores_lr) + '\n' + 'Precision: {d[0]:.2f}\nRecall: {d[1]:.2f} \nF2 score: {d[2]:.2f} \n'. format(d=p_r_fscore_lr[:3]), ha='left', va='bottom', fontdict=font, transform=plt.subplot(1, 4, 1).transAxes) # In[7]: # ### OneVsRestClassifier with Naive Bayes classifier = OneVsRestClassifier(GaussianNB()) nbclf = classifier.fit(X_train, df_y_train['label'].values) y_test_predictions_nbclf = nbclf.predict(X_test) y_predict_prob = nbclf.predict_proba(X_test) # save results into a DF results['NB_pred'] = list(y_test_predictions_nbclf) results['NB_r_prob'] = y_predict_prob[:, 0] #Perform 3-fold cross validation and return the mean accuracy on each fold cv_scores = cross_val_score(classifier, X_train, y_train) #default 3-fold cross validation print('NB cv_scores', cv_scores) # answer = pd.DataFrame(y_predict_prob, columns = class_names).round(decimals=3) # index= pd.DataFrame(X_test).index.tolist()) #print('One vs Rest - Naive Bayes\n', answer.head()) # Confusion Matrix for Naive Bayes cmNB = confusion_matrix(y_test, y_test_predictions_nbclf, labels=list(class_names)) plt.subplot(1, 4, 2) plot_confusion_matrix(cm1=cmNB, classes=class_names, normalize=True, gradientbar=False, title='One vs Rest - Naive Bayes\n') cv_scores = ["{:.2f}".format(x) for x in cv_scores] p_r_fscore_NB = precision_recall_fscore_support(y_test, y_test_predictions_nbclf, beta=2.0, labels=['Parasitized'], pos_label='Parasitized', average='binary') print(p_r_fscore_NB[:3]) plt.text( 0.01, -1, '\nCV Scores:\n' + str(cv_scores) + '\n' + 'Precision: {d[0]:.2f}\nRecall: {d[1]:.2f} \nF2 score: {d[2]:.2f} \n'. format(d=p_r_fscore_NB[:3]), ha='left', va='bottom', fontdict=font, transform=plt.subplot(1, 4, 2).transAxes) # ### Random Forest Classification # In[8]: # Next, let's try Random Forest Classifier if n_comp < 100: f = n_comp else: f = 100 n = 30 RFclf = OneVsRestClassifier( RandomForestClassifier(n_estimators=n, max_features=f)) RFclf.fit(X_train, df_y_train['label'].values) y_test_predictions_RF = RFclf.predict(X_test) # y_score_RF = RFclf.predict_proba(X_test) y_score_answer_RF = RFclf.predict_proba(X_test) # save results into a DF results['RF'] = list(y_test_predictions_RF) results['RF_prob'] = y_score_answer_RF[:, 0] #Perform 3-fold cross validation and return the mean accuracy on each fold cv_scores_RF = cross_val_score(RFclf, X_train, y_train) #default 3-fold cross validation print('Random Forest cv_scores', cv_scores_RF) # answer_RF = pd.DataFrame(y_score_answer_RF) save_RF = '../models/trained_RF.sav' pickle.dump(RFclf, open(save_RF, 'wb')) #print('Random Forest\n', answer_RF.head()) # confusion matrix cmRF = confusion_matrix(y_test, y_test_predictions_RF, labels=list(class_names)) plt.subplot(1, 4, 3) plot_confusion_matrix( cm1=cmRF, classes=class_names, normalize=True, gradientbar=False, title='Random Forests\nestimators: {0}\n max_features: {1}\n'.format( n, f)) cv_scores_RF = ["{:.2f}".format(x) for x in cv_scores_RF] p_r_fscore_RF = precision_recall_fscore_support(y_test, y_test_predictions_RF, beta=2.0, labels=['Parasitized'], pos_label='Parasitized', average='binary') print(p_r_fscore_RF[:3]) plt.text( 0.01, -1, '\nCV Scores:\n' + str(cv_scores_RF) + '\n' + 'Precision: {d[0]:.2f}\nRecall: {d[1]:.2f} \nF2 score: {d[2]:.2f} \n'. format(d=p_r_fscore_RF[:3]), ha='left', va='bottom', fontdict=font, transform=plt.subplot(1, 4, 3).transAxes) # ### Adaptive Boosting Classifier # http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.AdaBoostClassifier.html # In[9]: AdaBoost = AdaBoostClassifier() AdaBoost.fit(X_train, y_train) y_predAB = AdaBoost.predict(X_test) y_predAB_prob = AdaBoost.predict_proba(X_test) # y_predAB_binarized = label_binarize(y_predAB, # classes=['single_product','market_place']) # save results into a DF results['AB_pred'] = list(y_predAB) results['AB_prob'] = y_predAB_prob[:, 0] results.to_csv('../data/y_test_predictions') #Perform 3-fold cross validation and return the mean accuracy on each fold cv_scores_AB = cross_val_score(AdaBoost, X_train, y_train) #default 3-fold cross validation print('Adaptive Boosting cv_scores', cv_scores_AB) save_AdaBoost = '../models/trained_AdaBoost.sav' pickle.dump(AdaBoost, open(save_AdaBoost, 'wb')) plt.subplot(1, 4, 4) cmAdaBoost = confusion_matrix(y_test, y_predAB, labels=list(class_names)) plot_confusion_matrix(cm1=cmAdaBoost, normalize=True, classes=class_names, title='AdaBoost\n', gradientbar=False) cv_scores_AB = ["{:.2f}".format(x) for x in cv_scores_AB] p_r_fscore_AB = precision_recall_fscore_support(y_test, y_predAB, beta=2.0, labels=['Parasitized'], pos_label='Parasitized', average='binary') print(p_r_fscore_AB[:3]) plt.text( 0.01, -1, '\nCV Scores:\n' + str(cv_scores_AB) + '\n' + 'Precision: {d[0]:.2f}\nRecall: {d[1]:.2f} \nF2 score: {d[2]:.2f} \n'. format(d=p_r_fscore_AB[:3]), ha='left', va='bottom', fontdict=font, transform=plt.subplot(1, 4, 4).transAxes) # #### Comparing mean accuracy and confusion matrices of difference classification algorithrms # In[10]: print('\nLogistic Regression mean accuracy:', round(log_reg_classifier.score(X_test, y_test), 4)) print('One vs Rest - Naive Bayes mean accuracy:', round(classifier.score(X_test, y_test), 4)) print('Random Forest Classifier mean accuracy:', round(RFclf.score(X_test, y_test), 4)) print('Adaptive Boosting Classifier mean accuracy:', round(AdaBoost.score(X_test, y_test), 4)) plt.tight_layout() fig.tight_layout() plt.savefig('../plots/confusion_matrix_result_1.png') plt.show(block=False) ### -- ROC and AUC # Compute ROC curve and area the curve plt.figure(12) # print('y_test before binirization', y_test[0:4]) y_test = label_binarize(y_test, classes=['Uninfected', 'Parasitized']) # print('y_test after binirization', y_test[0:4]) # print(y_predict_prob_log_r[1:4, 0]) fpr, tpr, thresholds = roc_curve(y_test, y_predict_prob_log_r[:, 0]) roc_df = pd.DataFrame({'fpr': fpr, 'tpr': tpr, 'thresholds': thresholds}) roc_df.to_csv('../data/roc_data.csv') # tprs = [interp(mean_fpr, fpr, tpr)] # tprs[-1][0] = 0.0 roc_auc = auc(fpr, tpr) plt.title('Receiver Operating Characteristic', fontsize=18) plt.plot(fpr, tpr, lw=2, color='#3399ff', label='AUC = {0:.2f}'.format(roc_auc)) plt.plot([0, 1], [0, 1], linestyle='--', lw=2, color='gray', label='Chance', alpha=.8) plt.ylabel('True Positive Rate', fontsize=14) plt.xlabel('False Positive Rate', fontsize=14) plt.tick_params(axis='both', which='major', labelsize=12) plt.legend(loc="lower right") plt.tight_layout() plt.savefig('../plots/ROC_CNN_log_reg.png') plt.show() plt.close('all') print( 'If launched from command line use ctrl+z to close all plots and finish' )
def main_Core50(conf, run, close_at_the_end=False): # Prepare configurations files conf['solver_file_first_batch'] = conf['solver_file_first_batch'].replace( 'X', conf['model']) conf['solver_file'] = conf['solver_file'].replace('X', conf['model']) conf['init_weights_file'] = conf['init_weights_file'].replace( 'X', conf['model']) conf['tmp_weights_file'] = conf['tmp_weights_file'].replace( 'X', conf['model']) train_filelists = conf['train_filelists'].replace('RUN_X', run) test_filelist = conf['test_filelist'].replace('RUN_X', run) run_on_the_fly = True # If True, tells the train_utils.get_data(...) script not to cache batch data on disk (Path(conf['exp_path']) / 'CM').mkdir(exist_ok=True, parents=True) (Path(conf['exp_path']) / 'EwC').mkdir(exist_ok=True, parents=True) (Path(conf['exp_path']) / 'Syn').mkdir(exist_ok=True, parents=True) if 'brn_past_weight' not in conf or conf['brn_past_weight'] is None: if conf['rehearsal_is_latent']: conf['brn_past_weight'] = 20000 else: conf['brn_past_weight'] = 10000 # To change if needed the network prototxt if conf['rehearsal_is_latent']: solver_param = caffe_pb2.SolverParameter() with open(conf['solver_file']) as f: txtf.Merge(str(f.read()), solver_param) next_batches_net_prototxt_path = Path(solver_param.net) if not next_batches_net_prototxt_path.stem.endswith('b'): print( 'Error dealing with latent rehearsal: invalid net prototxt name!' ) exit(1) next_batches_net_prototxt_path_orig = next_batches_net_prototxt_path.parent / ( next_batches_net_prototxt_path.stem[:-1] + next_batches_net_prototxt_path.suffix) moving_avg_fraction = 1.0 - (1.0 / conf['brn_past_weight']) train_utils.modify_net_prototxt( str(next_batches_net_prototxt_path_orig), str(next_batches_net_prototxt_path), moving_average_fraction=moving_avg_fraction) if conf['model'] == 'MobileNetV1': rehearsal_layer_mapping_for_mobilenetv1 = { 'data': ([-1, 3, 128, 128], 'conv1'), 'conv2_1/dw': ([-1, 32, 64, 64], 'conv2_1/sep'), #conv2_1 / dw(128, 32, 64, 64) # conv2_1 / sep(128, 64, 64, 64) 'conv2_2/dw': ([-1, 64, 32, 32], 'conv2_2/sep'), #conv2_2 / dw(128, 64, 32, 32) # conv2_2 / sep(128, 128, 32, 32) 'conv3_1/dw': ([-1, 128, 32, 32], 'conv3_1/sep'), #conv3_1 / dw(128, 128, 32, 32) # conv3_1 / sep(128, 128, 32, 32) 'conv3_2/dw': ([-1, 128, 16, 16], 'conv3_2/sep'), #conv3_2 / dw(128, 128, 16, 16) # conv3_2 / sep(128, 256, 16, 16) 'conv4_1/dw': ([-1, 256, 16, 16], 'conv4_1/sep'), #conv4_1 / dw(128, 256, 16, 16) # conv4_1 / sep(128, 256, 16, 16) 'conv4_2/dw': ([-1, 256, 8, 8], 'conv4_2/sep'), #conv4_2 / dw(128, 256, 8, 8) # conv4_2 / sep(128, 512, 8, 8) 'conv5_1/dw': ([-1, 512, 8, 8], 'conv5_1/sep'), #conv5_1 / dw(512, 1, 3, 3) # conv5_1 / sep(512, 512, 1, 1) 'conv5_2/dw': ([-1, 512, 8, 8], 'conv5_2/sep'), #conv5_2 / dw(512, 1, 3, 3) # conv5_2 / sep(512, 512, 1, 1) 'conv5_3/dw': ([-1, 512, 8, 8], 'conv5_3/sep'), #conv5_3 / dw(512, 1, 3, 3) # conv5_3 / sep(512, 512, 1, 1) 'conv5_4/dw': ([-1, 512, 8, 8], 'conv5_4/sep'), #conv5_4 / dw(512, 1, 3, 3) # conv5_4 / sep(512, 512, 1, 1) 'conv5_5/dw': ([-1, 512, 8, 8], 'conv5_5/sep'), #conv5_5 / dw(512, 1, 3, 3) # conv5_5 / sep(512, 512, 1, 1) 'conv5_6/dw': ([-1, 512, 4, 4], 'conv5_6/sep'), #conv5_6 / dw(512, 1, 3, 3) # conv5_6 / sep(1024, 512, 1, 1) 'conv6/dw': ([-1, 1024, 4, 4], 'conv6/sep'), #conv6 / dw(1024, 1, 3, 3) # conv6 / sep(1024, 1024, 1, 1) 'pool6': ([-1, 1024, 1, 1], 'mid_fc7') #avg_pool(1024) # mid_fc7(50, 1024, 1, 1)(50, ) } current_mapping = rehearsal_layer_mapping_for_mobilenetv1[ conf['rehearsal_layer']] if 'rehearsal_stop_layer' not in conf or conf[ 'rehearsal_stop_layer'] is None: conf['rehearsal_stop_layer'] = current_mapping[1] rehe_lat_surgery.create_concat_layer_from_net_template( str(next_batches_net_prototxt_path), str(next_batches_net_prototxt_path), conf['rehearsal_layer'], current_mapping[0], current_mapping[1], original_input=21, rehearsal_input=107) else: raise RuntimeError('Unsupported model for latent rehearsal:', conf['model']) # Parse the solver prototxt # for more details see - https://stackoverflow.com/questions/31823898/changing-the-solver-parameters-in-caffe-through-pycaffe if conf['initial_batch'] == 0: print('Solver proto: ', conf['solver_file_first_batch']) solver_param = caffe_pb2.SolverParameter() with open(conf['solver_file_first_batch']) as f: txtf.Merge(str(f.read()), solver_param) net_prototxt = solver_param.net # Obtains the path to the net prototxt print('Net proto: ', net_prototxt) else: print('Solver proto: ', conf['solver_file']) solver_param = caffe_pb2.SolverParameter() with open(conf['solver_file']) as f: txtf.Merge(str(f.read()), solver_param) net_prototxt = solver_param.net # Obtains the path to the net prototxt print('Net proto: ', net_prototxt) # Obtain class labels if conf['class_labels'] != '': # More complex than a simple loadtxt because of the unicode representation in python 3 label_str = np.loadtxt(conf['class_labels'], dtype=bytes, delimiter="\n").astype(str) # Obtain minibatch size from net proto train_minibatch_size, test_minibatch_size = train_utils.extract_minibatch_size_from_prototxt_with_input_layers( net_prototxt) print(' test minibatch size: ', test_minibatch_size) print(' train minibatch size: ', train_minibatch_size) # Load test set print("Recovering Test Set: ", test_filelist, " ...") start = time.time() test_x, test_y = train_utils.get_data(test_filelist, conf['db_path'], conf['exp_path'], on_the_fly=run_on_the_fly, verbose=conf['verbose']) assert (test_x.shape[0] == test_y.shape[0]) if conf['num_classes'] < 50: # Checks if we are doing category-based classification test_y = test_y // 5 test_y = test_y.astype(np.float32) test_patterns = test_x.shape[0] test_x, test_y, test_iterat = train_utils.pad_data(test_x, test_y, test_minibatch_size) print(' -> %d patterns of %d classes (%.2f sec.)' % (test_patterns, len(np.unique(test_y)), time.time() - start)) print(' -> %.2f -> %d iterations for full evaluation' % (test_patterns / test_minibatch_size, test_iterat)) # Load training patterns in batches (by now assume the same number in all batches) batch_count = conf['num_batches'] train_patterns = train_utils.count_lines_in_batches( batch_count, train_filelists) train_iterations_per_epoch = np.zeros(batch_count, int) train_iterations = np.zeros(batch_count, int) test_interval_epochs = conf['test_interval_epochs'] test_interval = np.zeros(batch_count, float) for batch in range(batch_count): if conf["rehearsal"] and batch > 0: train_patterns[batch] += conf["rehearsal_memory"] train_iterations_per_epoch[batch] = int( np.ceil(train_patterns[batch] / train_minibatch_size)) test_interval[ batch] = test_interval_epochs * train_iterations_per_epoch[batch] if (batch == 0): train_iterations[batch] = train_iterations_per_epoch[batch] * conf[ 'num_epochs_first_batch'] else: train_iterations[ batch] = train_iterations_per_epoch[batch] * conf['num_epochs'] print("Batch %2d: %d patterns, %d iterations (%d iter. per epochs - test every %.1f iter.)" \ % (batch, train_patterns[batch], train_iterations[batch], train_iterations_per_epoch[batch], test_interval[batch])) # Create evaluation points # -> iterations which are boundaries of batches batch_iter = [0] iter = 0 for batch in range(batch_count): iter += train_iterations[batch] batch_iter.append(iter) # Calculates the iterations where the network will be evaluated eval_iters = [ 1 ] # Start with 1 (instead of 0) because the test net is aligned to the train one after solver.step(1) for batch in range(batch_count): start = batch_iter[batch] end = batch_iter[batch + 1] start += test_interval[batch] while start < end: eval_iters.append(int(start)) start += test_interval[batch] eval_iters.append(end) # Iterations which are epochs in the evaluation range epochs_iter = [] for batch in range(batch_count): start = batch_iter[batch] end = batch_iter[batch + 1] start += train_iterations_per_epoch[batch] while start <= end: epochs_iter.append(int(start)) start += train_iterations_per_epoch[batch] prev_train_loss = np.zeros(len(eval_iters)) prev_test_acc = np.zeros(len(eval_iters)) prev_train_acc = np.zeros(len(eval_iters)) prev_exist = filelog.TryLoadPrevTrainingLog(conf['train_log_file'], prev_train_loss, prev_test_acc, prev_train_acc) train_loss = np.copy( prev_train_loss ) # Copying allows to correctly visualize the graph in case we start from initial_batch > 0 test_acc = np.copy(prev_test_acc) train_acc = np.copy(prev_train_acc) epochs_tick = False if batch_count > 30 else True # For better visualization visualization.Plot_Incremental_Training_Init('Incremental Training', eval_iters, epochs_iter, batch_iter, train_loss, test_acc, 5, conf['accuracy_max'], prev_exist, prev_train_loss, prev_test_acc, show_epochs_tick=epochs_tick) filelog.Train_Log_Init(conf['train_log_file']) filelog.Train_LogDetails_Init(conf['train_log_file']) start_train = time.time() eval_idx = 0 # Evaluation iterations counter global_eval_iter = 0 # Global iterations counter first_round = True initial_batch = conf['initial_batch'] if initial_batch > 0: # Move forward by skipping unnecessary evaluation global_eval_iter = batch_iter[initial_batch] while eval_iters[eval_idx] < global_eval_iter: eval_idx += 1 eval_idx += 1 for batch in range(initial_batch, batch_count): print( '\nBATCH = {:2d} ----------------------------------------------------' .format(batch)) if batch == 0: solver = caffe.get_solver( conf['solver_file_first_batch'] ) # Load the solver for the first batch and create net(s) if conf['init_weights_file'] != '': solver.net.copy_from(conf['init_weights_file']) print('Network created and Weights loaded from: ', conf['init_weights_file']) # Test solver.share_weights(solver.test_nets[0]) print('Weights shared with Test Net') accuracy, _, pred_y = train_utils.test_network_with_accuracy_layer( solver, test_x, test_y, test_iterat, test_minibatch_size, prediction_level_Model[conf['model']], return_prediction=True) if conf['strategy'] in ['cwr+', 'ar1', 'ar1free']: cwr.zeros_cwr_layer_bias_lr(solver.net, cwr_layers_Model[conf['model']]) class_updates = np.full(conf['num_classes'], conf['initial_class_updates_value'], dtype=np.float32) cons_w = cwr.init_consolidated_weights( solver.net, cwr_layers_Model[conf['model']], conf['num_classes'] ) # allocate space for consolidated weights and initialze to 0 cwr.reset_weights( solver.net, cwr_layers_Model[conf['model']], conf['num_classes'] ) # reset weights to 0 (done here for the first batch to keep initial stats correct) # cwr.reset_weights(solver.net, cwr_layers_Model[conf['model']], conf['num_classes']) # reset weights to 0 (done here for the first batch to keep initial stats correct) if conf['strategy'] in ['ar1', 'ar1free']: ewcData, synData = syn.create_syn_data( solver.net ) # ewcData stores optimal weights + normalized fisher; trajectory store unnormalized summed grad*deltaW if conf['rehearsal_is_latent']: reha_data_size = solver.net.blobs[ conf['rehearsal_layer']].data[0].size rehearsal.allocate_memory(conf['rehearsal_memory'], reha_data_size, 1) else: rehearsal.allocate_memory(conf['rehearsal_memory'], test_x[0].size, 1) elif batch == 1: solver = caffe.get_solver( conf['solver_file']) # load solver and create net if first_round: solver.net.copy_from(conf['init_weights_file']) print('Network created and Weights loaded from: ', conf['init_weights_file']) else: solver.net.copy_from(conf['tmp_weights_file']) print('Network created and Weights loaded from: ', conf['tmp_weights_file']) solver.share_weights(solver.test_nets[0]) if first_round: print('Loading consolidated weights...') class_updates = np.full(conf['num_classes'], conf['initial_class_updates_value'], dtype=np.float32) rand_w, cons_w = cwr.copy_initial_weights( solver.net, cwr_layers_Model[conf['model']], conf['num_classes']) if conf['strategy'] in ['ar1']: ewcData, synData = syn.create_syn_data( solver.net ) # ewcData stores optimal weights + normalized fisher; trajectory store unnormalized summed grad*deltaW if conf['strategy'] in ['cwr+']: cwr.zeros_non_cwr_layers_lr( solver.net, cwr_layers_Model[conf['model']]) # blocca livelli sotto if conf['strategy'] in ['cwr+', 'ar1', 'ar1free']: if 'cwr_lr_mult' in conf.keys() and conf['cwr_lr_mult'] != 1: cwr.zeros_cwr_layer_bias_lr( solver.net, cwr_layers_Model[conf['model']], force_weights_lr_mult=conf['cwr_lr_mult']) else: cwr.zeros_cwr_layer_bias_lr( solver.net, cwr_layers_Model[conf['model']]) cwr.set_brn_past_weight(solver.net, conf['brn_past_weight']) # Initializes some data structures used for reporting stats. Executed once (in the first round) if first_round: if batch == 1 and (conf['strategy'] in ['cwr', 'cwr+', 'ar1', 'ar1free']): print('Cannot start from batch 1 in ', conf['strategy'], ' strategy!') sys.exit(0) visualization.PrintNetworkArchitecture(solver.net) # If accuracy layer is defined in the prototxt also in TRAIN mode -> log train accuracy too (not in the plot) try: report_train_accuracy = True err = solver.net.blobs[ 'accuracy'].num # Assume this is stable for prototxt of successive batches except: report_train_accuracy = False first_round = False if conf['compute_param_stats']: param_change = {} param_stats = train_utils.stats_initialize_param(solver.net) # nonzero_activations = train_utils.stats_activations_initialize(solver.net) # Load training data for the current batch # Note that the file lists are provided in the batch_filelists folder current_train_filelist = train_filelists.replace( 'XX', str(batch).zfill(2)) print("Recovering training data: ", current_train_filelist, " ...") batch_x, batch_y = train_utils.get_data(current_train_filelist, conf['db_path'], conf['exp_path'], on_the_fly=run_on_the_fly, verbose=conf['verbose']) print("Done.") if conf['num_classes'] < 50: # Category based classification batch_y = batch_y // 5 batch_t = train_utils.compute_one_hot_vectors(batch_y, conf['num_classes']) # Load patterns from Rehearsal Memory rehe_x, rehe_y = rehearsal.get_samples() rehe_t = train_utils.compute_one_hot_vectors(rehe_y, conf['num_classes']) # Detects how many patterns per class are present in the current batch if batch == 0: classes_in_cur_train = batch_y.astype(np.int) else: classes_in_cur_train = np.concatenate( (batch_y.astype(np.int), rehe_y.astype(np.int))) unique_y, y_freq = np.unique(classes_in_cur_train, return_counts=True) if conf['strategy'] in ['cwr+', 'ar1', 'ar1free' ] and batch > initial_batch: cwr.reset_weights( solver.net, cwr_layers_Model[conf['model']], conf['num_classes']) # Reset weights of CWR layers to 0 # Loads previously consolidated weights # This procedure, explained in Fine-Grained Continual Learning (https://arxiv.org/pdf/1907.03799.pdf), # is necessary in the NIC scenario if 'cwr_nic_load_weight' in conf.keys( ) and conf['cwr_nic_load_weight']: cwr.load_weights_nic(solver.net, cwr_layers_Model[conf['model']], unique_y, cons_w) if conf['strategy'] in ['ar1'] and batch > initial_batch: syn.weight_stats(solver.net, batch, ewcData, conf['ewc_clip_to']) solver.net.blobs['ewc'].data[...] = ewcData # Convert labels to float32 batch_y = batch_y.astype(np.float32) assert (batch_x.shape[0] == batch_y.shape[0]) rehe_y = rehe_y.astype(np.float32) avg_train_loss = 0 avg_train_accuracy = 0 avg_count = 0 if conf['strategy'] in ['syn', 'ar1']: syn.init_batch(solver.net, ewcData, synData) reharshal_size = conf[ "rehearsal_memory"] if batch > initial_batch else 0 orig_in_minibatch = np.round( train_minibatch_size * batch_x.shape[0] / (batch_x.shape[0] + reharshal_size)).astype(np.int) reha_in_minibatch = train_minibatch_size - orig_in_minibatch print(' -> Current Batch: %d patterns, External Memory: %d patterns' % (batch_x.shape[0], reharshal_size)) print( ' -> per minibatch (size %d): %d from current batch and %d from external memory' % (train_minibatch_size, orig_in_minibatch, reha_in_minibatch)) # Padding and shuffling batch_x, orig_iters_per_epoch = train_utils.pad_data_single( batch_x, orig_in_minibatch) batch_y, _ = train_utils.pad_data_single(batch_y, orig_in_minibatch) batch_t, _ = train_utils.pad_data_single(batch_t, orig_in_minibatch) batch_x, batch_y, batch_t = train_utils.shuffle_in_unison( (batch_x, batch_y, batch_t), 0) if conf['rehearsal_is_latent']: req_shape = (batch_x.shape[0], ) + solver.net.blobs[ conf['rehearsal_layer']].data.shape[1:] latent_batch_x = np.zeros(req_shape, dtype=np.float32) # Padding and shuffling of rehasal patterns reha_iters_per_epoch = 0 if reharshal_size > 0: rehe_x, reha_iters_per_epoch = train_utils.pad_data_single( rehe_x, reha_in_minibatch) rehe_y, _ = train_utils.pad_data_single(rehe_y, reha_in_minibatch) rehe_t, _ = train_utils.pad_data_single(rehe_t, reha_in_minibatch) rehe_x, rehe_y, rehe_t = train_utils.shuffle_in_unison( (rehe_x, rehe_y, rehe_t), 0) # shuffle print( ' -> iterations per epoch (with padding): %d, %d (initial %d)' % (orig_iters_per_epoch, reha_iters_per_epoch, train_iterations_per_epoch[batch])) # The main solver loop (per batch) it = 0 while it < train_iterations[batch]: # The following part is pretty much straight-forward # The current batch is split in minibatches (which size was previously detected by looking at the net prototxt) # The minibatch is loaded in blobs 'data', 'data_reha', 'label' and 'target' it_mod_orig = it % orig_iters_per_epoch orig_start = it_mod_orig * orig_in_minibatch orig_end = (it_mod_orig + 1) * orig_in_minibatch if conf['rehearsal_is_latent']: solver.net.blobs['data'].data[ ...] = batch_x[orig_start:orig_end] else: solver.net.blobs['data'].data[:orig_in_minibatch] = batch_x[ orig_start:orig_end] # Provide data to input layers (new patterns) solver.net.blobs['label'].data[:orig_in_minibatch] = batch_y[ orig_start:orig_end] solver.net.blobs['target'].data[:orig_in_minibatch] = batch_t[ orig_start:orig_end] # Provide data to input layers (reharsal patterns) if reharshal_size > 0: it_mod_reha = it % reha_iters_per_epoch reha_start = it_mod_reha * reha_in_minibatch reha_end = (it_mod_reha + 1) * reha_in_minibatch if conf['rehearsal_is_latent']: solver.net.blobs['data_reha'].data[ ...] = rehe_x[reha_start:reha_end] else: solver.net.blobs['data'].data[orig_in_minibatch:] = rehe_x[ reha_start:reha_end] solver.net.blobs['label'].data[orig_in_minibatch:] = rehe_y[ reha_start:reha_end] solver.net.blobs['target'].data[orig_in_minibatch:] = rehe_t[ reha_start:reha_end] if conf['strategy'] in ['ar1']: syn.pre_update(solver.net, ewcData, synData) # Explicit (net.step(1)) solver.net.clear_param_diffs() solver.net.forward() # start=None, end=None if batch > 0 and conf['strategy'] in ['cwr+', 'cwr']: solver.net.backward( end='mid_fc7' ) # In CWR+ we stop the backward step at the CWR layer else: if batch > 0 and 'rehearsal_stop_layer' in conf.keys( ) and conf['rehearsal_stop_layer'] is not None: # When using latent replay we stop the backward step at the latent rehearsal layer solver.net.backward(end=conf['rehearsal_stop_layer']) else: solver.net.backward() if conf['rehearsal_is_latent']: # Save latent features of new patterns (only during the first epoch) if batch > 0 and it < orig_iters_per_epoch: latent_batch_x[orig_start:orig_end] = solver.net.blobs[ conf['rehearsal_layer']].data # Weights update solver.apply_update() if conf['strategy'] == 'ar1': syn.post_update(solver.net, ewcData, synData, cwr_layers_Model[conf['model']]) print('+', end='', flush=True) global_eval_iter += 1 avg_count += 1 avg_train_loss += solver.net.blobs['loss'].data if report_train_accuracy: avg_train_accuracy += solver.net.blobs['accuracy'].data if global_eval_iter == eval_iters[eval_idx]: # Evaluation point if avg_count > 0: avg_train_loss /= avg_count avg_train_accuracy /= avg_count train_loss[eval_idx] = avg_train_loss print('\nIter {:>4}'.format(it + 1), '({:>4})'.format(global_eval_iter), ': Train Loss = {:.5f}'.format(avg_train_loss), end='', flush=True) if report_train_accuracy: train_acc[eval_idx] = avg_train_accuracy print(' Train Accuracy = {:.5f}%'.format( avg_train_accuracy * 100), end='', flush=True) compute_confusion_matrix = True if ( conf['confusion_matrix'] and it == train_iterations[batch] - 1) else False # last batch iter # The following lines are executed only if this is the last iteration for the current batch if conf['strategy'] in [ 'cwr+', 'ar1', 'ar1free' ] and it == train_iterations[batch] - 1: cwr.consolidate_weights_cwr_plus( solver.net, cwr_layers_Model[conf['model']], unique_y, y_freq, class_updates, cons_w) class_updates[unique_y] += y_freq print(class_updates) cwr.load_weights( solver.net, cwr_layers_Model[conf['model']], conf['num_classes'], cons_w) # Load consolidated weights for testing accuracy, _, pred_y = train_utils.test_network_with_accuracy_layer( solver, test_x, test_y, test_iterat, test_minibatch_size, prediction_level_Model[conf['model']], return_prediction=compute_confusion_matrix) test_acc[eval_idx] = accuracy * 100 print(' Test Accuracy = {:.5f}%'.format(accuracy * 100)) # Batch(Re)Norm Stats train_utils.print_bn_stats(solver.net) visualization.Plot_Incremental_Training_Update( eval_idx, eval_iters, train_loss, test_acc) filelog.Train_Log_Update(conf['train_log_file'], eval_iters[eval_idx], accuracy, avg_train_loss, report_train_accuracy, avg_train_accuracy) avg_train_loss = 0 avg_train_accuracy = 0 avg_count = 0 eval_idx += 1 # Next eval it += 1 # Next iter # Current batch training concluded if conf['strategy'] in ['ar1']: syn.update_ewc_data(solver.net, ewcData, synData, batch, conf['ewc_clip_to'], c=conf['ewc_w']) if conf['save_ewc_histograms']: visualization.EwcHistograms(ewcData, 100, save_as=conf['exp_path'] + 'Syn/F_' + str(batch) + '.png') if conf['rehearsal_is_latent']: if batch == 0: reha_it = 0 while reha_it < orig_iters_per_epoch: orig_start = reha_it * orig_in_minibatch orig_end = (reha_it + 1) * orig_in_minibatch solver.net.blobs['data'].data[ ...] = batch_x[orig_start:orig_end] solver.net.forward() latent_batch_x[orig_start:orig_end] = solver.net.blobs[ conf['rehearsal_layer']].data reha_it += 1 rehearsal.update_memory(latent_batch_x, batch_y.astype(np.int), batch) else: rehearsal.update_memory(batch_x, batch_y.astype(np.int), batch) if compute_confusion_matrix: # Computes the confusion matrix and logs + plots it cnf_matrix = confusion_matrix(test_y, pred_y, range(conf['num_classes'])) if batch == 0: prev_class_accuracies = np.zeros(conf['num_classes']) else: prev_class_accuracies = current_class_accuracies current_class_accuracies = np.diagonal( cnf_matrix) / cnf_matrix.sum(axis=1) deltas = current_class_accuracies - prev_class_accuracies classes_in_batch = set(batch_y.astype(np.int)) classes_non_in_batch = set(range( conf['num_classes'])) - classes_in_batch mean_class_in_batch = np.mean(deltas[list(classes_in_batch)]) std_class_in_batch = np.std(deltas[list(classes_in_batch)]) mean_class_non_in_batch = np.mean( deltas[list(classes_non_in_batch)]) std_class_non_in_batch = np.std(deltas[list(classes_non_in_batch)]) print( 'InBatch -> mean = %.2f%% std = %.2f%%, OutBatch -> mean = %.2f%% std = %.2f%%' % (mean_class_in_batch * 100, std_class_in_batch * 100, mean_class_non_in_batch * 100, std_class_non_in_batch * 100)) filelog.Train_LogDetails_Update(conf['train_log_file'], batch, mean_class_in_batch, std_class_in_batch, mean_class_non_in_batch, std_class_non_in_batch) visualization.plot_confusion_matrix( cnf_matrix, normalize=True, title='CM after batch: ' + str(batch), save_as=conf['exp_path'] + 'CM/CM_' + str(batch) + '.png') if conf['compute_param_stats']: train_utils.stats_compute_param_change_and_update_prev( solver.net, param_stats, batch, param_change) if batch == 0: solver.net.save(conf['tmp_weights_file']) print('Weights saved to: ', conf['tmp_weights_file']) del solver print('Training Time: %.2f sec' % (time.time() - start_train)) if conf['compute_param_stats']: stats_normalization = True train_utils.stats_normalize(solver.net, param_stats, batch_count, param_change, stats_normalization) visualization.Plot3d_param_stats(solver.net, param_change, batch_count, stats_normalization) filelog.Train_Log_End(conf['train_log_file']) filelog.Train_LogDetails_End(conf['train_log_file']) visualization.Plot_Incremental_Training_End(close=close_at_the_end)
def main(): # 写入数据 print('*' * 20, "程序开始-读取数据", '*' * 20) X_Train = commonFunc.parseFile('../UCI HAR Dataset/train/X_train.txt') Y_Train = commonFunc.parseFile( '../UCI HAR Dataset/train/y_train.txt').flatten() X_Test = commonFunc.parseFile('../UCI HAR Dataset/test/X_test.txt') Y_Test = commonFunc.parseFile( '../UCI HAR Dataset/test/y_test.txt').flatten() activityLabels = [ 'WALKING', 'WALKING_UPSTAIRS', 'WALKING_DOWNSTAIRS', 'SITTING', 'STANDING', 'LAYING' ] print("数据读取完成~\n") # 参数设置 print('*' * 20, "设置参数表", '*' * 20) KERNELSET = 'linear' DEGREE = 0.91 CSET = 3 COEF0 = 0 GAMMA = 'scale' print("SVM:") print("kernel:{0} \t C:{1}".format(KERNELSET, CSET)) print("RFE:") STEPSET = 5 MINFEATURETOSET = 300 CROSSVALIDATION = 20 CPUCHANNEL = 6 print( "estimate:SVM \t step:{0} \t min_feature_to_select:{1} \t CrossValidation:{2} \t CPUChannel:{3} \n" .format(STEPSET, MINFEATURETOSET, CROSSVALIDATION, CPUCHANNEL)) # 特征选择 print('*' * 20, "读取特征文件", '*' * 20) maskSaveName = "SVM-features-mask.out" if (os.path.exists(maskSaveName)): print("存在特征文件,开始读取...") maskInteger = np.loadtxt(maskSaveName) mask = (maskInteger == 1) print("读取完成,准备显示...") print("特征选择数量: {0}".format(sum(mask == 1))) else: print("特征文件不存在~") print("开始特征选择...") start = perf_counter() estimator = svm.SVC(kernel=KERNELSET, degree=DEGREE, C=CSET, coef0=COEF0, gamma=GAMMA, probability=False) selector = RFECV(estimator, step=STEPSET, min_features_to_select=MINFEATURETOSET, cv=CROSSVALIDATION, n_jobs=CPUCHANNEL) selector = selector.fit(X_Train, Y_Train) mask = selector.get_support() print("特征选择完成!") print("用时 {0:.2f}mins".format((perf_counter() - start) / 60)) print("特征选择数量: {0}".format(sum(mask == 1))) np.savetxt(maskSaveName, mask, fmt='%d') # 画图 plt.figure(figsize=(14, 14)) plt.subplot(2, 2, (1, 2)) plt.imshow(mask.reshape(1, -1), cmap='tab20c_r') plt.title("Feature Selected: {0}".format(sum(mask == 1)), fontsize=14, y=2.5) plt.ylim([-5, 5]) plt.xlabel("Feature Index(Deeper Color means Selected)", fontsize=10) # plt.show() print('\n') # 选择特征抽取 print('*' * 20, "特征选择后的数据结果", '*' * 20) X_Train_selected = X_Train[:, mask] X_Test_selected = X_Test[:, mask] clf_selected = svm.SVC(kernel=KERNELSET, degree=DEGREE, C=CSET, coef0=COEF0, gamma=GAMMA, probability=False) clf_selected.fit(X_Train_selected, Y_Train) Y_predict_selected = clf_selected.predict(X_Test_selected) prec_selected, rec_selected, f_score_selected = commonFunc.checkAccuracy( Y_Test, Y_predict_selected) print("训练结果:") print("准确率:{0}\n召回率:{1}\nF1度量:{2}".format(prec_selected, rec_selected, f_score_selected)) # 混淆矩阵 plt.subplot(2, 2, 3) cm = commonFunc.createConfusionMatrix(Y_predict_selected, Y_Test) plot_confusion_matrix(cm, activityLabels, normalize=False, title='Selected_F Confusion matrix') print('\n') # 原始数据的训练结果 print('*' * 20, "特征选择前的数据结果", '*' * 20) clf = svm.SVC(kernel=KERNELSET, degree=DEGREE, C=CSET, coef0=COEF0, gamma=GAMMA, probability=False) clf.fit(X_Train, Y_Train) Y_predict = clf.predict(X_Test) prec, rec, f_score = commonFunc.checkAccuracy(Y_Test, Y_predict) print("训练结果:") print("准确率:{0}\n召回率:{1}\nF1度量:{2}".format(prec, rec, f_score)) # 混淆矩阵 plt.subplot(2, 2, 4) cm = commonFunc.createConfusionMatrix(Y_predict, Y_Test) plot_confusion_matrix(cm, activityLabels, normalize=False, title='All_F Confusion matrix') # plt.tight_layout() plt.show()
feed_dict = { model.input_x: test_features, model.input_y: one_hot_test_labels } predictions = sess.run( model.predictions, feed_dict) recall, precision, f1, confusion_matrix = visualization.calculate_cm( pred_vals=predictions, true_vals=test_labels, classes=list(range(n_labels))) visualization.plot_confusion_matrix( cm=confusion_matrix, classes=list(range(n_labels)), path=f'figures/{name}/', name=f'step-{i}.png', normalize=True) metrics[i] = [ confusion_matrix, recall, precision, f1 ] path = f'metrics/{metric_path}/' if not os.path.exists(path): os.makedirs(path) with open(f'{path}{metric_name}.p', 'wb') as bin_file:
def training_model(num): # torch.manual_seed(RANDOM_SEED_NUM) # Load the dataset X_training, Y_training, X_dev, Y_dev, X_test, Y_test = data_load() print(len(X_training)) modellist = [] training_losseslist = [] test_accuracieslist = [] training_losses = [] test_accuracies = [] y_truelist = [] y_predlist = [] y_true = [] y_pred = [] for t in range(TEST_NUM): try: # Create the model model = BiRNN(input_size, hidden_size, num_layers, num_classes).to(device) # Loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) training_losses = [] test_accuracies = [] # Train the model train_dataset = TensorDataset(X_training, Y_training) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) dev_dataset = TensorDataset(X_dev, Y_dev) dev_loader = DataLoader(dev_dataset) test_dataset = TensorDataset(X_test, Y_test) test_loader = DataLoader(test_dataset) # total_step = len(train_loader) # how many batches for one epoch for epoch in range(num_epochs): for i, (inputs, labels) in enumerate(train_loader): inputs = inputs.reshape(-1, sequence_length, input_size).to(device) labels = labels.reshape(-1).to(device) # Forward pass outputs = model(inputs) training_loss = criterion(outputs, labels) # Backward and optimize optimizer.zero_grad() training_loss.backward() optimizer.step() if (epoch + 1) % 2 == 0: print('Test [{}/{}], Epoch [{}/{}], Loss: {:.4f}'.format( t + 1, TEST_NUM, epoch + 1, num_epochs, training_loss.item())) # Get the value of loss training_losses.append(training_loss.item()) # Test the model on dev set with torch.no_grad(): y_true = [] y_pred = [] correct = 0 total = 0 for j, (inputs, labels) in enumerate(dev_loader): inputs = inputs.reshape(-1, sequence_length, input_size).to(device) labels = labels.reshape(-1).to(device) outputs = model(inputs) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() y_true.append(labels.item()) y_pred.append(predicted.item()) test_accuracies.append(correct / total) except KeyboardInterrupt: print('Stop!') modellist.append(model) training_losseslist.append(training_losses) test_accuracieslist.append(test_accuracies) y_truelist.append(y_true) y_predlist.append(y_pred) # Print accuracy of the model accuracy = [] for item in test_accuracieslist: accuracy.append(item[-1] * 100) max_accuracy = max(accuracy) print('Dev accuracy of the No.{} model on dev action samples: {} %'.format( num + 1, max_accuracy)) # Show or save the graph of variance and bias analysis, and confusion matrix graph variance_and_bias_analysis(training_losseslist, test_accuracieslist) save('trials' + str(num + 1) + '_loss_accuracy' + '.png') plot_confusion_matrix(y_truelist, y_predlist, LABELS) save('trials_' + str(num + 1) + '_confusion_matrix' + '.png') return max_accuracy, modellist[accuracy.index(max_accuracy)], test_loader
def log_confusion_matrix(tb_logger, epoch, data_subset, metrics): figure = plot_confusion_matrix(metrics["confusion matrix"].cpu().numpy()) tb_logger.writer.add_figure(f"confusion matrix/{data_subset}", figure, global_step=epoch)
def main_Core50(conf, run, close_at_the_end = False): # Prepare configurations files conf['solver_file_first_batch'] = conf['solver_file_first_batch'].replace('X', conf['model']) conf['solver_file'] = conf['solver_file'].replace('X', conf['model']) conf['init_weights_file'] = conf['init_weights_file'].replace('X', conf['model']) conf['tmp_weights_file'] = conf['tmp_weights_file'].replace('X', conf['model']) train_filelists = conf['train_filelists'].replace('RUN_X', run) test_filelist = conf['test_filelist'].replace('RUN_X', run) # For run 0 store/load binary files # For the rest of runs read single files (slower, but saves disk space) #run_on_the_fly = False if run == 'run0' else True run_on_the_fly = True # This is the procedure we applied to obtain the reduced test set # train_utils.reduce_filelist(test_filelist, test_filelist+"3", 20) (Path(conf['exp_path']) / 'CM').mkdir(exist_ok=True, parents=True) (Path(conf['exp_path']) / 'EwC').mkdir(exist_ok=True, parents=True) (Path(conf['exp_path']) / 'Syn').mkdir(exist_ok=True, parents=True) # Parse the solver prototxt # for more details see - https://stackoverflow.com/questions/31823898/changing-the-solver-parameters-in-caffe-through-pycaffe print('Solver proto: ', conf['solver_file_first_batch']) solver_param = caffe_pb2.SolverParameter() with open(conf['solver_file_first_batch']) as f: txtf.Merge(str(f.read()), solver_param) net_prototxt = solver_param.net # Obtains the path to the net prototxt print('Net proto: ',net_prototxt) # Obtain class labels if conf['class_labels'] != '': # More complex than a simple loadtxt because of the unicode representation in python 3 label_str = np.loadtxt(conf['class_labels'], dtype=bytes, delimiter="\n").astype(str) # Obtain minibatch size from net proto train_minibatch_size, test_minibatch_size = train_utils.extract_minibatch_size_from_prototxt_with_input_layers(net_prototxt) print(' test minibatch size: ', test_minibatch_size) print(' train minibatch size: ', train_minibatch_size) # Is the network using target vectors (besides the labels)? need_target = train_utils.net_use_target_vectors(net_prototxt) # Load test set print ("Recovering Test Set: ", test_filelist, " ...") start = time.time() test_x, test_y = train_utils.get_data(test_filelist, conf['db_path'], conf['exp_path'], on_the_fly=run_on_the_fly, verbose = conf['verbose']) assert(test_x.shape[0] == test_y.shape[0]) if conf['num_classes'] == 10: # Checks if we are doing category-based classification test_y = test_y // 5 test_y = test_y.astype(np.float32) test_patterns = test_x.shape[0] test_x, test_y, test_iterat = train_utils.pad_data(test_x, test_y, test_minibatch_size) print (' -> %d patterns of %d classes (%.2f sec.)' % (test_patterns, len(np.unique(test_y)), time.time() - start)) print (' -> %.2f -> %d iterations for full evaluation' % (test_patterns / test_minibatch_size, test_iterat)) # Load training patterns in batches (by now assume the same number in all batches) batch_count = conf['num_batches'] train_patterns = train_utils.count_lines_in_batches(batch_count,train_filelists) train_iterations_per_epoch = np.zeros(batch_count, int) train_iterations = np.zeros(batch_count, int) test_interval_epochs = conf['test_interval_epochs'] test_interval = np.zeros(batch_count, float) for batch in range(batch_count): train_iterations_per_epoch[batch] = int(np.ceil(train_patterns[batch] / train_minibatch_size)) test_interval[batch] = test_interval_epochs * train_iterations_per_epoch[batch] if (batch == 0): train_iterations[batch] = train_iterations_per_epoch[batch] * conf['num_epochs_first_batch'] else: train_iterations[batch] = train_iterations_per_epoch[batch] * conf['num_epochs'] print ("Batch %2d: %d patterns, %d iterations (%d iter. per epochs - test every %.1f iter.)" \ % (batch, train_patterns[batch], train_iterations[batch], train_iterations_per_epoch[batch], test_interval[batch])) # Create evaluation points # -> iterations which are boundaries of batches batch_iter = [0] iter = 0 for batch in range(batch_count): iter += train_iterations[batch] batch_iter.append(iter) # Calculates the iterations where the network will be evaluated eval_iters = [1] # Start with 1 (insted of 0) because the test net is aligned to the train one after solver.step(1) for batch in range(batch_count): start = batch_iter[batch] end = batch_iter[batch+1] start += test_interval[batch] while start < end: eval_iters.append(int(start)) start += test_interval[batch] eval_iters.append(end) # Iterations which are epochs in the evaluation range epochs_iter = [] for batch in range(batch_count): start = batch_iter[batch] end = batch_iter[batch+1] start += train_iterations_per_epoch[batch] while start <= end: epochs_iter.append(int(start)) start += train_iterations_per_epoch[batch] prev_train_loss = np.zeros(len(eval_iters)) prev_test_acc = np.zeros(len(eval_iters)) prev_exist = filelog.TryLoadPrevTrainingLog(conf['train_log_file'], prev_train_loss, prev_test_acc) train_loss = np.copy(prev_train_loss) # Copying allows to correctly visualize the graph in case we start from initial_batch > 0 test_acc = np.copy(prev_test_acc) train_acc = np.zeros(len(eval_iters)) epochs_tick = False if batch_count > 30 else True # For better visualization visualization.Plot_Incremental_Training_Init('Incremental Training', eval_iters, epochs_iter, batch_iter, train_loss, test_acc, 5, conf['accuracy_max'], prev_exist, prev_train_loss, prev_test_acc, show_epochs_tick = epochs_tick) filelog.Train_Log_Init(conf['train_log_file']) filelog.Train_LogDetails_Init(conf['train_log_file']) start_train = time.time() eval_idx = 0 # Evaluation iterations counter global_eval_iter = 0 # Global iterations counter first_round = True initial_batch = conf['initial_batch'] if initial_batch > 0: # Move forward by skipping unnecessary evaluation global_eval_iter = batch_iter[initial_batch] while eval_iters[eval_idx] < global_eval_iter: eval_idx += 1 eval_idx += 1 for batch in range(initial_batch, batch_count): print ('\nBATCH = {:2d} ----------------------------------------------------'.format(batch)) if batch == 0: solver = caffe.get_solver(conf['solver_file_first_batch']) # Load the solver for the first batch and create net(s) if conf['init_weights_file'] !='': solver.net.copy_from(conf['init_weights_file']) print('Network created and Weights loaded from: ', conf['init_weights_file']) # Test solver.test_nets[0].copy_from(conf['init_weights_file']) accuracy, _ , pred_y = train_utils.test_network_with_accuracy_layer(solver, test_x, test_y, test_iterat, test_minibatch_size, prediction_level_Model[conf['model']], return_prediction = True) # BatchNorm Stats train_utils.print_bn_stats(solver.net) if conf['strategy'] in ['cwr+','ar1']: cwr.zeros_cwr_layer_bias_lr(solver.net, cwr_layers_Model[conf['model']]) class_updates = np.zeros(conf['num_classes'], dtype=np.float32) cons_w = cwr.init_consolidated_weights(solver.net, cwr_layers_Model[conf['model']], conf['num_classes']) # Allocate space for consolidated weights and initialze them to 0 cwr.reset_weights(solver.net, cwr_layers_Model[conf['model']], conf['num_classes']) # Reset cwr weights to 0 (done here for the first batch to keep initial stats correct) if conf['strategy'] == 'cwr' or conf['dynamic_head_expansion'] == True: class_updates = np.zeros(conf['num_classes'], dtype=np.float32) rand_w, cons_w = cwr.copy_initial_weights(solver.net, cwr_layers_Model[conf['model']], conf['num_classes']) # Random values for cwr layers (since they do not exist in pretrained models) if conf['strategy'] in ['syn','ar1']: # ewcData stores optimal weights + normalized fisher; trajectory stores unnormalized summed grad*deltaW ewcData, synData = syn.create_syn_data(solver.net) elif batch == 1: solver = caffe.get_solver(conf['solver_file']) # Load the solver for the next batches and create net(s) solver.net.copy_from(conf['tmp_weights_file']) print('Network created and Weights loaded from: ', conf['tmp_weights_file']) if conf['strategy'] in ['cwr','cwr+']: cwr.zeros_non_cwr_layers_lr(solver.net, cwr_layers_Model[conf['model']]) # In CWR we freeze every layer except the CWR one(s) # By providing a cwr_lr_mult multiplier we can use a different Learning Rate for CWR and non-CWR cwr_layers_Model # Note that a similar result can be achieved by manually editing the net prototxt if conf['strategy'] in ['cwr+', 'ar1']: if 'cwr_lr_mult' in conf.keys() and conf['cwr_lr_mult'] != 1: cwr.zeros_cwr_layer_bias_lr(solver.net, cwr_layers_Model[conf['model']], force_weights_lr_mult = conf['cwr_lr_mult']) else: cwr.zeros_cwr_layer_bias_lr(solver.net, cwr_layers_Model[conf['model']]) cwr.set_brn_past_weight(solver.net, 10000) # Initializes some data structures used for reporting stats. Executed once (in the first round) if first_round: if batch == 1 and (conf['strategy'] in ['ewc','cwr', 'cwr+', 'syn', 'ar1']): print('Cannot start from batch 1 in ', conf['strategy'], ' strategy!') sys.exit(0) visualization.PrintNetworkArchitecture(solver.net) # if accuracy layer is defined in the prototxt also in TRAIN mode -> log also train accuracy (not in the plot) try: report_train_accuracy = True err = solver.net.blobs['accuracy'].num # assume this is stable for prototxt of successive batches except: report_train_accuracy = False first_round = False if conf['compute_param_stats']: param_change = {} param_stats = train_utils.stats_initialize_param(solver.net) # Load training data for the current batch # Note that the file lists are provided in the batch_filelists folder current_train_filelist = train_filelists.replace('XX', str(batch).zfill(2)) print ("Recovering training data: ", current_train_filelist, " ...") load_start = time.time() train_x, train_y = train_utils.get_data(current_train_filelist, conf['db_path'], conf['exp_path'], on_the_fly=run_on_the_fly, verbose = conf['verbose']) print ("Done.") if conf['num_classes'] == 10: # Category based classification train_y = train_y // 5 # If target values (e.g. one hot vectors) are needed we need to create them from numerical class labels if need_target: target_y = train_utils.compute_one_hot_vectors(train_y, conf['num_classes']) train_x, tmp_iters = train_utils.pad_data_single(train_x, train_minibatch_size) train_y, _ = train_utils.pad_data_single(train_y, train_minibatch_size) target_y, _ = train_utils.pad_data_single(target_y, train_minibatch_size) if batch>0 and conf['strategy'] == 'lwf': if conf['lwf_weight'] > 0: weight_old = conf['lwf_weight'] else: weight_old = 1 - (train_patterns[batch] / np.sum(train_patterns[0:batch+1])) x_min = 2.0/3.0 x_max = 0.9 y_min = 0.45 y_max = 0.60 # if weight_old > x_max: weight_old = x_max # Clip weight_old weight_old = y_min + (weight_old - x_min)*(y_max-y_min)/(x_max-x_min) print('Lwf Past Weight: %.2f' % (weight_old)) target_y = lwf.update_target_vectors(solver, train_x, train_y, conf['num_classes'], train_iterations_per_epoch[batch], train_minibatch_size, weight_old) if conf['dynamic_head_expansion'] == True: train_utils.dynamic_head_expansion(solver.net, cwr_layers_Model[conf['model']], conf['num_classes'], train_y, rand_w) if conf['strategy'] == 'cwr' and batch > initial_batch: cwr.load_weights(solver.net, cwr_layers_Model[conf['model']], conf['num_classes'], rand_w) # Reset net weights if conf['strategy'] in ['cwr+','ar1'] and batch > initial_batch: cwr.reset_weights(solver.net, cwr_layers_Model[conf['model']], conf['num_classes']) # Reset weights of CWR layers to 0 (maximal head approach!) # Loads previously consolidated weights # This procedure, explained in the paper, is necessary in the NIC scenario if 'cwr_nic_load_weight' in conf.keys() and conf['cwr_nic_load_weight']: cwr.load_weights_nic(solver.net, cwr_layers_Model[conf['model']], train_y, cons_w) train_x, train_y, target_y = train_utils.shuffle_in_unison((train_x, train_y, target_y), 0) if conf['strategy'] in ['ewc','syn','ar1'] and batch > initial_batch: #syn.weight_stats(solver.net, batch, ewcData, conf['ewc_clip_to']) # Makes ewc info available to the network for successive training # The 'ewc' blob will be used by our C++ code (see the provided custom "sgd_solver.cpp") solver.net.blobs['ewc'].data[...] = ewcData else: #TODO: review branch (is it necessary?) train_x, tmp_iters = train_utils.pad_data_single(train_x, train_minibatch_size) train_y, _ = train_utils.pad_data_single(train_y, train_minibatch_size) train_x, train_y = train_utils.shuffle_in_unison((train_x, train_y), 0) # apply temporal coherence strategy to modify labels if batch > 0 and conf['strategy'] != 'naive': train_x, train_y = train_utils.predict_labels_temporal_coherence(solver, train_x, train_y, conf['num_classes'], train_iterations_per_epoch[batch], train_minibatch_size, conf['strategy'], 0.80) # ATTENTION, if patterns have been removed do padding again! print (' -> %d patterns (of %d classes) after padding and shuffling (%.2f sec.)' % (train_x.shape[0], len(np.unique(train_y)), time.time()-load_start)) assert(train_iterations[batch] >= tmp_iters) # convert labels to float32 train_y = train_y.astype(np.float32) assert(train_x.shape[0] == train_y.shape[0]) # training avg_train_loss = 0 avg_train_accuracy = 0 avg_count = 0; if conf['strategy'] in ['syn','ar1']: syn.init_batch(solver.net, ewcData, synData) # The main solver loop (per batch) it = 0 while it < train_iterations[batch]: # The following part is pretty much straight-forward # The current batch is split in minibatches (which size was previously detected by looking at the net prototxt) # The minibatch is loaded in blobs 'data', 'label' and 'target' # a step(1) is executed (which executes forward + backward + weights update) it_mod = it % train_iterations_per_epoch[batch] start = it_mod * train_minibatch_size end = (it_mod + 1) * train_minibatch_size if conf['verbose']: avgl = avga = 0 if avg_count > 0: avgl = avg_train_loss / avg_count print ('Iter {:>4}'.format(it+1), '({:>4})'.format(global_eval_iter), ': Train Loss = {:.5f}'.format(avgl), end='', flush = True) if report_train_accuracy: if avg_count > 0: avga = avg_train_accuracy / avg_count print (' Train Accuracy = {:.5f}%'.format(avga*100), flush = True) else: print ('+', end = '', flush=True) # Provide data to input layers solver.net.blobs['data'].data[...] = train_x[start:end] solver.net.blobs['label'].data[...] = train_y[start:end] if need_target: solver.net.blobs['target'].data[...] = target_y[start:end] if conf['strategy'] in ['syn','ar1']: syn.pre_update(solver.net, ewcData, synData) # SGD by Caffe if conf['strategy'] in ['cwr+','cwr'] and batch > initial_batch: solver.net.clear_param_diffs() solver.net.forward() # start=None, end=None solver.net.backward(end='mid_fc7') solver.apply_update() else: solver.step(1) #train_utils.print_bn_stats(solver.net) # If enabled saves the gradient magniture of the prediction_level stats on file # train_utils.gradient_stats(prediction_level_Model[conf['model']], global_eval_iter, solver.net, train_y, start, end) if conf['strategy'] == 'syn': syn.post_update(solver.net, ewcData, synData) if conf['strategy'] == 'ar1': syn.post_update(solver.net, ewcData, synData, cwr_layers_Model[conf['model']]) global_eval_iter +=1 avg_count +=1 avg_train_loss += solver.net.blobs['loss'].data if report_train_accuracy: avg_train_accuracy += solver.net.blobs['accuracy'].data # Early stopping (a.k.a. Limited) if conf['strategy'] == '_syn' and avg_count > 0 and avg_train_loss/avg_count < syn.target_train_loss_accuracy_per_batch(batch): # enable by removing "_" on demand it = train_iterations[batch]-1 # skip to last iter global_eval_iter = eval_iters[eval_idx] # enable evaluation point now if global_eval_iter == eval_iters[eval_idx]: # Evaluation point if avg_count > 0: avg_train_loss/= avg_count avg_train_accuracy /= avg_count train_loss[eval_idx] = avg_train_loss print ('\nIter {:>4}'.format(it+1), '({:>4})'.format(global_eval_iter), ': Train Loss = {:.5f}'.format(avg_train_loss), end='', flush = True) if report_train_accuracy: train_acc[eval_idx] = avg_train_accuracy print (' Train Accuracy = {:.5f}%'.format(avg_train_accuracy*100), end='', flush = True) compute_confusion_matrix = True if (conf['confusion_matrix'] and it == train_iterations[batch]-1) else False # last batch iter # The following lines are executed only if this is the last iteration for the current batch if conf['strategy'] in ['cwr', 'cwr+', 'ar1'] and it == train_iterations[batch]-1: if conf['strategy'] == 'cwr': batch_weight = conf['cwr_batch0_weight'] if batch == initial_batch else 1 cwr._consolidate_weights_cwr(solver.net, cwr_layers_Model[conf['model']], train_y, cons_w, batch_weight, class_updates = class_updates) class_updates[train_y.astype(np.int)] += 1; # Increase weights of trained classes else: unique_y, y_freq = np.unique(train_y.astype(np.int), return_counts=True) cwr.consolidate_weights_cwr_plus(solver.net, cwr_layers_Model[conf['model']], unique_y, y_freq, class_updates, cons_w) class_updates[unique_y] += y_freq; # print(class_updates) cwr.load_weights(solver.net, cwr_layers_Model[conf['model']], conf['num_classes'], cons_w) # Load consolidated weights for testing accuracy, _ , pred_y = train_utils.test_network_with_accuracy_layer(solver, test_x, test_y, test_iterat, test_minibatch_size, prediction_level_Model[conf['model']], return_prediction = compute_confusion_matrix) test_acc[eval_idx] = accuracy*100 print (' Test Accuracy = {:.5f}%'.format(accuracy*100)) # Batch(Re)Norm Stats train_utils.print_bn_stats(solver.net) visualization.Plot_Incremental_Training_Update(eval_idx, eval_iters, train_loss, test_acc) filelog.Train_Log_Update(conf['train_log_file'], eval_iters[eval_idx], accuracy, avg_train_loss, report_train_accuracy, avg_train_accuracy) avg_train_loss = 0 avg_train_accuracy = 0 avg_count = 0 eval_idx+=1 # next eval it+=1 # next iter # Current batch training concluded if conf['strategy'] == 'ewc': if batch == initial_batch: ewcData, fisher = ewc.create_ewc_data(solver.net) # ewcData stores optimal weights + normalized fisher; fisher store unnormalized summed fisher print ("Computing Fisher Information and Storing Optimal Weights...") ewc.update_ewc_data(ewcData, fisher, solver.net, train_x, train_y, target_y, train_iterations_per_epoch[batch], train_minibatch_size, batch, conf['ewc_clip_to'], conf['ewc_w']) print ("Done.") if conf['save_ewc_histograms']: visualization.EwcHistograms(ewcData, 100, save_as = conf['exp_path'] + 'EwC/F_' + str(batch) + '.png') if conf['strategy'] in ['syn','ar1']: syn.update_ewc_data(solver.net, ewcData, synData, batch, conf['ewc_clip_to']) if conf['save_ewc_histograms']: visualization.EwcHistograms(ewcData, 100, save_as = conf['exp_path'] + 'Syn/F_' + str(batch) + '.png') if compute_confusion_matrix: # Computes the confusion matrix and logs + plots it cnf_matrix = confusion_matrix(test_y, pred_y) if batch ==0: prev_class_accuracies = np.zeros(conf['num_classes']) else: prev_class_accuracies = current_class_accuracies current_class_accuracies = np.diagonal(cnf_matrix) / cnf_matrix.sum(axis = 1) deltas = current_class_accuracies - prev_class_accuracies classes_in_batch = set(train_y.astype(np.int)) classes_non_in_batch = set(range(conf['num_classes']))-classes_in_batch mean_class_in_batch = np.mean(deltas[list(classes_in_batch)]) std_class_in_batch = np.std(deltas[list(classes_in_batch)]) mean_class_non_in_batch = np.mean(deltas[list(classes_non_in_batch)]) std_class_non_in_batch = np.std(deltas[list(classes_non_in_batch)]) print('InBatch -> mean = %.2f%% std = %.2f%%, OutBatch -> mean = %.2f%% std = %.2f%%' % (mean_class_in_batch*100, std_class_in_batch*100, mean_class_non_in_batch*100, std_class_non_in_batch*100)) filelog.Train_LogDetails_Update(conf['train_log_file'], batch, mean_class_in_batch, std_class_in_batch, mean_class_non_in_batch, std_class_non_in_batch) visualization.plot_confusion_matrix(cnf_matrix, normalize = True, title='CM after batch: ' + str(batch), save_as = conf['exp_path'] + 'CM/CM_' + str(batch) + '.png') if conf['compute_param_stats']: train_utils.stats_compute_param_change_and_update_prev(solver.net, param_stats, batch, param_change) if batch == 0: solver.net.save(conf['tmp_weights_file']) print('Weights saved to: ', conf['tmp_weights_file']) del solver print('Training Time: %.2f sec' % (time.time() - start_train)) if conf['compute_param_stats']: stats_normalization = True train_utils.stats_normalize(solver.net, param_stats, batch_count, param_change, stats_normalization) visualization.Plot3d_param_stats(solver.net, param_change, batch_count, stats_normalization) filelog.Train_Log_End(conf['train_log_file']) filelog.Train_LogDetails_End(conf['train_log_file']) visualization.Plot_Incremental_Training_End(close = close_at_the_end)
print("Accuracy for split", split, ":", accuracy, "Total Time: ", class_time - start, ". BOW Time: ", bow_time - start, ". Classification Time: ", class_time - bow_time) split += 1 time_list.append(np.average(splits_accuracy)) accuracy_list.append(np.average(splits_time)) # test_images_filenames = open_pkl('test_images_filenames.dat') # test_labels = open_pkl('test_labels.dat') # # Plot Acurracy plot_accuracy_vs_time(range_value, accuracy_list, time_list, feature_name='Number of SIFT scales', title="DSIFT") unique_labels = list(set(y_test)) # Compute confusion matrix cnf_matrix = confusion_matrix(y_test, predicted_labels, labels=unique_labels) # Plot normalized confusion matrix np.set_printoptions(precision=2) plot_confusion_matrix(cnf_matrix, classes=unique_labels, normalize=True, title='Normalized confusion matrix')