def print_matrix(predictions): class_names = np.array([str(i) for i in range(1+np.max( np.concatenate([clusters_true2, predictions])))]) plot_confusion_matrix( clusters_true2, predictions, classes=class_names, normalize=True, title='Normalized confusion matrix', path=DATASET_PATH + DATASET_NAME, should_save=True)
def evaluate(self, X, y): """ Evaluates the trained crf model. :param X: test data: [[(word, pos), (word, pos)], [...]] :param y: test labels :return: evaluation result """ y_pred = self.crf.predict(X) sent_level_acc = self.evaluate_sentence(y, y_pred) labels = flatten(y) y_pred = flatten(y_pred) print("F1 score:") print( sklearn.metrics.precision_recall_fscore_support(labels, y_pred, average='micro')) print() print("Accuracy:") print(sklearn.metrics.accuracy_score(labels, y_pred)) print() print("Sentence level accuracy:") print(sent_level_acc) print() print("F1 score per class:") print(sklearn.metrics.precision_recall_fscore_support(labels, y_pred)) print() print("Confusion matrix:") cfm = sklearn.metrics.confusion_matrix(labels, y_pred) plot_confusion_matrix(cfm, np.unique(labels))
def get_results(clf, data, fit_time, feature_set, clf_name='', save_confusion=False): results = {} t0 = time.time() predicted = np.array([]) for i in range(0, len(data['test']['X']), 128): # go in chunks of size 128 predicted_single = clf.predict(data['test']['X'][i:(i + 128)]) predicted = np.append(predicted, predicted_single) t1 = time.time() cm = metrics.confusion_matrix(data['test']['y'], predicted) results['testing_time'] = t1 - t0 results['accuracy'] = metrics.accuracy_score(data['test']['y'], predicted) print("classifier: %s" % clf_name) print("training time: %0.4fs" % fit_time) print("testing time: %0.4fs" % results['testing_time']) print("accuracy: %0.4f" % results['accuracy']) print("confusion matrix:\n%s" % cm) if save_confusion: path = './confusion_plots/%s_%s' % (clf_name, feature_set) title = '%s (accuracy: %0.2f)' % (clf_name, results['accuracy']) u.plot_confusion_matrix(cm, path, title=title) return results
def performance_summary(y_test, y_predicted, output, y_mapping=None, y_labels=None): scores = {} scores['Accuracy'] = accuracy_score(y_test, y_predicted) scores['Precision'] = precision_score(y_test, y_predicted, average='macro') scores['Recall'] = recall_score(y_test, y_predicted, average='macro') scores['F1'] = f1_score(y_test, y_predicted, average='macro') print(scores) print(' - Detailed classification report:') if y_mapping is not None: y_test = list(map(y_mapping, y_test)) y_predicted = list(map(y_mapping, y_predicted)) detailed_report = classification_report(y_test, y_predicted) print(detailed_report, end='\n') with open(os.path.join(output, 'classification_report.txt'), 'w') as out: out.writelines(detailed_report) print(' - Saving confusion matrix') utils.plot_confusion_matrix(y_test, y_predicted, labels=y_labels, output=os.path.join(output, 'confusion_matrix.pdf'))
def eval_fn(self, loader, device, train=False, confusion_m = False, criterion = None): """ Evaluation method :param loader: data loader for either training or testing set :param device: torch device :param train: boolean to indicate if training or test set is used :return: accuracy on the data """ objs = AvgrageMeter() score = AvgrageMeter() self.eval() t = tqdm(loader) with torch.no_grad(): for images, labels in t: images = images.to(device) labels = labels.to(device) outputs = self(images) acc, _ = accuracy(outputs, labels, topk=(1, 5)) score.update(acc.item(), images.size(0)) if(criterion): loss = criterion(outputs, labels) objs.update(loss.data, images.size(0)) if(confusion_m): # Plot confusion matrix plot_confusion_matrix(labels.cpu(), outputs.topk(1, 1, True, True)[1].cpu(), normalize = True, title='Confusion matrix') t.set_description('(=> Test) Score: {:.4f}'.format(score.avg)) return score.avg, objs.avg
def evaluate(self, X_val, y_val): # evaluate_vgg16 the model with validation set model = load_model(self.model_file) scores = model.evaluate(X_val, y_val) print('val_loss: {}, val_acc: {}'.format(scores[0], scores[1])) y_true, y_pred = get_predictions_and_labels(model, X_val, y_val) cm = confusion_matrix(y_true, y_pred) cm_percent = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] df = pd.DataFrame(cm_percent, index=self.EMOTIONS, columns=self.EMOTIONS) df.index.name = 'Actual' df.columns.name = 'Predicted' df.to_csv(self.base_dir + self.model_dir + 'cm_val.csv', float_format='%.4f') # plot percentage confusion matrix fig1, ax1 = plt.subplots() plot_confusion_matrix(cm_percent, class_names=self.EMOTIONS) plt.savefig(self.base_dir + self.model_dir + 'cm_percent_val.png', format='png') # plot normal confusion matrix fig2, ax2 = plt.subplots() plot_confusion_matrix(cm, float_display='.0f', class_names=self.EMOTIONS) plt.savefig(self.base_dir + self.model_dir + 'cm_val.png', format='png') plt.show()
def do_viterbi(self): parameters = {} parameters["dataset"] = "A" if self.a_radio.isChecked() else "B" if self.split_radio.isChecked(): parameters["test_days"] = self.days_spin.value() if self.sampling_radio.isChecked(): parameters["n_samples"] = self.samples_spin.value() sample, predicted, accuracy = smarthouse(**parameters) plot_classification_report(sample, predicted) plt.figure(2) plot_confusion_matrix( sample, predicted, list(map(str, range(max(sample) + 1))), normalize=True, ) sample_text, predicted_text = self.format_sequences(sample, predicted) self.accuracy_value_label.setText(f"{accuracy*100:.3f}") self.sample_textbrowser.setText(sample_text) self.predicted_textbrowser.setText(predicted_text) plt.show()
def compare_model(self, X_val, y_val): folder_list = [ model_dir for model_dir in os.listdir(self.base_dir) if 'LSTM' in model_dir ] for folder in folder_list: filename = 'LSTM.h5' path = os.path.join(self.base_dir, folder, filename) model = load_model(path) scores = model.evaluate(X_val, y_val) print('model: {}, val_loss: {}, val_acc: {}'.format( folder, scores[0], scores[1])) y_true, y_pred = get_predictions_and_labels(model, X_val, y_val) cm = confusion_matrix(y_true, y_pred) cm_percent = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] # plot percentage confusion matrix fig1, ax1 = plt.subplots() plot_confusion_matrix(cm_percent, class_names=self.EMOTIONS) plt.savefig(os.path.join(self.base_dir, folder, 'cm_percent_test.png'), format='png') # plot normal confusion matrix fig2, ax2 = plt.subplots() plot_confusion_matrix(cm, float_display='.0f', class_names=self.EMOTIONS) plt.savefig(os.path.join(self.base_dir, folder, 'cm_test.png'), format='png')
def main_pipeline(): pp = pprint.PrettyPrinter(indent=4) pipe = Pipeline([ ('scale', StandardScaler()), # ('classify', LinearSVC()) ('classify', LogisticRegression(C=0.01, penalty='l1')) ]) data = load_data() X, y = get_X_y(data) X_undersample, y_undersample = generate_undersample_rus(X, y) pipe.fit(X_undersample, y_undersample) # predict on source data y_pred = pipe.predict(X) # Compute confusion matrix cnf_matrix = confusion_matrix(y, y_pred) np.set_printoptions(precision=2) print("Recall metric in the testing dataset: ", cnf_matrix[1, 1] / (cnf_matrix[1, 0] + cnf_matrix[1, 1])) # Plot non-normalized confusion matrix class_names = [0, 1] plt.figure() plot_confusion_matrix(cnf_matrix, classes=class_names, title='Confusion matrix') plt.show() print('plt.show')
def model(clf, Xtrain, Xtest, ytrain, ytest, modelname): """ :param model: estimator """ clf = clf clf.fit(Xtrain, ytrain) pred = clf.predict(Xtest) cm = confusion_matrix(ytest, pred) print('############################') print("the recall for", modelname, ' is:', cm[1, 1] / (cm[1, 1] + cm[1, 0])) print('############################') fig, ax = plt.subplots() u.plot_confusion_matrix(cm, classes=np.unique(ytrain), ax=ax, title='resampled' + modelname) plt.show() #fig = plt.figure(figsize=(6,3)) print("TP: ", cm[1, 1, ], "exceptional events transaction predicted exception") # print("TN: ", cm[0, 0], "normal events predicted normal") print("FP: ", cm[0, 1], "normal events predicted exception") print("FN: ", cm[1, 0], "exceptional events predicted normal") # sns.heatmap(cm, cmap="coolwarm_r", annot=True, linewidths=0.5) # plt.title("Confusion_matrix") # plt.xlabel("Predicted_class") # plt.ylabel("Real class") # plt.show() print( "\n----------Classification Report------------------------------------" ) print(classification_report(ytest, pred))
def visualize(m_test, x_test, y_test, model, variant=None): # viz accuracy print('predicting test set...') y_pred = model.predict([m_test, x_test], batch_size=48) conf_mat = confusion_matrix(y_test.argmax(axis=1), y_pred.argmax(axis=1)) name = 'RNN' if variant is not None: name += '_' + variant plot_confusion_matrix(conf_mat, RARITIES, name)
def predict(**kwargs): truth, predict, accuracy = smarthouse(**kwargs) print(sklearn.metrics.classification_report(truth, predict)) conf_mat = sklearn.metrics.confusion_matrix(truth, predict) plot_confusion_matrix(truth, predict, list(map(str, range(max(truth)))), normalize=True)
def val(netG_A2B, netG_B2A, netD_A, netD_B, netGaze): global best_accuracy netG_B2A.eval() netGaze.eval() pred_all = np.array([], dtype='int64') target_all = np.array([], dtype='int64') for idx, (data, target) in enumerate(val_loader): if args.cuda: data, target = data.cuda(), target.cuda() data, target = Variable(data[:, :args.nc, :, :]), Variable(target) # do the forward pass data = gaze2gan(data, val_loader.dataset.mean[0:args.nc], val_loader.dataset.std[0:args.nc]) fake_data = netG_B2A(data) fake_data = gan2gaze(fake_data, val_loader.dataset.mean[0:args.nc], val_loader.dataset.std[0:args.nc]) scores = netGaze(fake_data.repeat(1, int(3 / args.nc), 1, 1))[0] scores = scores.view(-1, args.num_classes) pred = scores.data.max(1)[ 1] # got the indices of the maximum, match them print('Done with image {} out of {}...'.format( min(args.batch_size * (idx + 1), len(val_loader.dataset)), len(val_loader.dataset))) pred_all = np.append(pred_all, pred.cpu().numpy()) target_all = np.append(target_all, target.cpu().numpy()) val_accuracy, _ = plot_confusion_matrix(target_all, pred_all, merged_activity_classes) print("\n------------------------") print("Validation accuracy = {:.2f}%\n------------------------".format( val_accuracy)) with open(os.path.join(args.output_dir, "logs.txt"), "a") as f: f.write("\n------------------------\n") f.write( "Validation accuracy = {:.2f}%\n------------------------\n".format( val_accuracy)) # now save the model if it has better accuracy than the best model seen so forward if val_accuracy > best_accuracy: # save the model torch.save(netG_A2B.state_dict(), os.path.join(args.output_dir, 'netG_A2B.pth')) torch.save(netG_B2A.state_dict(), os.path.join(args.output_dir, 'netG_B2A.pth')) torch.save(netD_A.state_dict(), os.path.join(args.output_dir, 'netD_A.pth')) torch.save(netD_B.state_dict(), os.path.join(args.output_dir, 'netD_B.pth')) torch.save(netGaze.state_dict(), os.path.join(args.output_dir, 'netGaze.pth')) best_accuracy, _ = plot_confusion_matrix(target_all, pred_all, merged_activity_classes, args.output_dir) return val_accuracy
def evaluate_model (config,model,X_test,Y_test,savedir,combination): features = X_test.shape[2] targets = Y_test.shape[2] major_classes = ['idle', 'stop', 'go', 'clear'] minor_classes = ["idle", "stop_both-static", "stop_both-dynamic", "stop_left-static", "stop_left-dynamic", "stop_right-static", "stop_right-dynamic", "clear_left-static", "clear_right-static", "go_both-static", "go_both-dynamic", "go_left-static", "go_left-dynamic", "go_right-static", "go_right-dynamic"] predictions = model.predict(X_test) if int (config ['training-mode']['subclasses']) == 1: classes_tcg = minor_classes predictions_bin = utils.binarize_predictions(X_test.reshape(-1, features), Y_test.reshape(-1, targets), predictions.reshape(-1, targets), subclasses=True) else: classes_tcg = major_classes predictions_bin = utils.binarize_predictions(X_test.reshape(-1, features), Y_test.reshape(-1, targets), predictions.reshape(-1, targets)) cnf_matrix = confusion_matrix(predictions_bin[1], predictions_bin[2], labels=classes_tcg) np.set_printoptions(precision=2) plt.figure(figsize=(30, 12)) plt.subplot(121) utils.plot_confusion_matrix(cnf_matrix, classes=classes_tcg, title='Confusion matrix, without normalization') plt.close('all') plt.subplot(122) utils.plot_confusion_matrix(cnf_matrix, classes=classes_tcg, normalize=True, title='Confusion matrix with normalization') plt.suptitle("confusion matrix") plt.subplots_adjust(top=0.88) plt.savefig(os.path.join(savedir, 'cm' + '_' + combination)) unpadded_seq = utils.delete_pading(X_test.reshape(-1, X_test.shape[2]), Y_test.reshape(-1, targets), predictions.reshape(-1, targets)) utils.plot_roc_multiclass(unpadded_seq[1], unpadded_seq[2], targets, classes_tcg) plt.savefig(os.path.join(savedir, 'roc'+'_'+combination)) plt.close('all')
def _train_party_classifier(self, force: bool = False): """ Trains classifier learning to predict political party from moral relevance weight vectors. :param force: Trains and overwrites classifier even if already available. :return: """ pp_model_path = "data/party_predictor.pkl" pp_predictor = None # Build model predicting moral values for word. if force or not os.path.isfile(pp_model_path): df = self._users_df.sample(frac=1) df.mv_scores = df.mv_scores.values / df.num_words.values df.loc[df.party == "Libertarians", "party"] = "Republican Party" class_names = ["Republican Party", "Democratic Party"] x = np.asarray([np.asarray(x) for x in df.mv_scores.values]) le = preprocessing.LabelEncoder() le.fit(class_names) y = le.transform(df.party.values) for train_index, test_index in StratifiedShuffleSplit( n_splits=1, test_size=0.5).split(x, y): x_train, x_test = x[train_index], x[test_index] y_train, y_test = y[train_index], y[test_index] pp_predictor = xgb.XGBClassifier(objective='binary:logistic', colsample_bytree=0.7, learning_rate=0.05, n_estimators=6000, n_jobs=0, nthread=0) pp_predictor.fit(x_train, y_train) pickle.dump(pp_predictor, open(pp_model_path, "wb")) y_pred = pp_predictor.predict(x_test) print( classification_report(y_test, y_pred, target_names=class_names)) utils.plot_precision_recall_curve(y_test, y_pred) utils.plot_roc_curve(y_test, y_pred, 2) utils.plot_confusion_matrix( y_test, y_pred, ["Republican Party", "Democratic Party"], title="Confusion Matrix") # scores = cross_val_score(pp_predictor, x, y, cv=20, scoring='f1_macro') # print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) # Load built model. else: pp_predictor = pickle.load(open( pp_model_path, "rb")) # pd.read_pickle(path=pp_model_path) return pp_predictor
def make_confusion_matrix(self): y_true = pd.read_csv( osp.join(self.ROOT, "input", self.raw_dirname, "train.csv"))["label"].values y_pred = pd.read_csv(osp.join(self.val_preds_path, "oof_preds.csv"))["pred"].values cmx = confusion_matrix(y_true, y_pred) plot_confusion_matrix(cm=cmx, classes=self.classes, save_path=self.WORK_DIR)
def evaluate_by_frame_state_level(predictions_idx, true_idx, stateList, model_name): acc = accuracy_score(predictions_idx, true_idx) print('Frame-by-frame at the state level: ', acc * 100, '%') # plot confusion matrix cm = confusion_matrix(true_idx, predictions_idx) plot_confusion_matrix( cm, len(stateList), model_name, 'Confusion matrix for frame-by-frame at state level') return acc
def test_vgg_dataset(): global dataset_config dataset = dataset_config['ucm'] img_path = tf.placeholder(tf.string) img_content = tf.read_file(img_path) img = tf.image.decode_image(img_content, channels=3) # img = tf.image.resize_image_with_crop_or_pad(img, config.IMG_W, config.IMG_H) img2 = tf.image.resize_nearest_neighbor([img], [config.IMG_H, config.IMG_W]) # with tf.Session() as sess: # mm2 = sess.run(img2,feed_dict={img_path:'hd_0613.jpg'})[0] # print(mm2.shape) # plt.imshow(mm2) # # plt.show() x = tf.placeholder(tf.float32, shape=[1, config.IMG_W, config.IMG_H, 3]) y_ = tf.placeholder(tf.int16, shape=[1, config.N_CLASSES]) logits = VGG.VGG16N(x, config.N_CLASSES, False) predict = tf.argmax(logits, 1) # true_label = tf.argmax(label_batch, 1) # loss = tools.loss(logits, y_) # accuracy = tools.accuracy(logits, y_) saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(dataset['checkpoint_path']) matrix_confusion = np.zeros((dataset['n_class'], dataset['n_class'])) if ckpt and ckpt.model_checkpoint_path: global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] print('step: ', global_step) i = 0 with tf.Session() as sess: i = 0 saver.restore(sess, ckpt.model_checkpoint_path) val_data_path = os.path.join(dataset['data_path'], 'validation') for val_class_name in os.listdir(val_data_path): class_path = os.path.join(val_data_path, val_class_name) class_index = dataset['class2label'][val_class_name] for val_img_name in os.listdir(class_path): val_img_path = os.path.join(class_path, val_img_name) img_content = sess.run(img2, feed_dict={img_path: val_img_path}) pre = sess.run(predict, feed_dict={x: img_content}) print(class_index, pre) matrix_confusion[class_index][pre] += 1 utils.plot_confusion_matrix(matrix_confusion, normalize=False, target_names=config.ucm_class, title="Confusion Matrix") np.savetxt('ucm_vgg_confusion_matrix', matrix_confusion)
def confusion_matrix(self, normalize=False): """ Plots a confusion matrix of the model """ predictions = self.model.predict_generator(self.test_batches) predictions = np.argmax(predictions, axis=1) ground_truth = self.test_batches.classes classes = [*self.test_batches.class_indices] utils.plot_confusion_matrix(ground_truth, predictions, classes, normalize=normalize)
def evaluate(X_test, y_test): # evaluate the model with validation set model = load_model('../MLP/mlp.h5') scores = model.evaluate(X_test, y_test) print('val_loss: {}, val_acc: {}'.format(scores[0], scores[1])) y_pred = model.predict_classes(X_test) cm = confusion_matrix(y_test, y_pred) cm_percent = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] # plot percentage confusion matrix plot_confusion_matrix(cm_percent, class_names=['Cat', 'Dog']) plt.savefig('../MLP/cm_percent_val.png', format='png') plt.show()
def knn_classify(X, y, neighbors=1, test_size=0.3, plot_conf_matrix=True): X_train, X_test, y_train, y_test = ( train_test_split(X, y, random_state=0, test_size=test_size)) knn_classifier = KNeighborsClassifier(n_neighbors=neighbors, algorithm='kd_tree') knn_classifier.fit(X_train, y_train) y_pred = knn_classifier.predict(X_test) if plot_conf_matrix: title = "KNN Classification with N={0}".format(neighbors) plot_confusion_matrix(y_test, y_pred, title)
def evaluate(model, o_idx, output_dict=True, plot_matrix=False): with torch.no_grad(): y_true = [] y_pred = [] for i in range(len(X_test)): numer_Y = [tag_to_ix[y] for y in y_test[i]] score, tag_seq = model(X_test[i][0], X_test[i][1]) y_true.extend(numer_Y) y_pred.extend(tag_seq) y_true = np.array(y_true) y_pred = np.array(y_pred) exclude_o_idx = np.where(y_true != o_idx) y_pred_without_o = y_pred[exclude_o_idx] y_without_o = y_true[exclude_o_idx] y_pred_without_o_class = [id_to_tag[y] for y in y_pred_without_o] y_without_o_class = [id_to_tag[y] for y in y_without_o] #print(type(y_without_o),type(y_pred)) #print(labels) perf = classification_report(y_without_o_class, y_pred_without_o_class, output_dict=output_dict, labels=labels) if plot_matrix: #print(__doc__) np.set_printoptions(precision=2) #print(len(X_test[0][0]),len(y_true),len(y_pred)) f = open(error_path, 'w') acc = 0 for i in range(len(X_test)): for j in range(len(X_test[i][0])): if y_true[acc] != y_pred[acc]: f.write(id2word[X_test[i][0][j][4].item()] + " " + id_to_tag[y_true[acc]] + " " + id_to_tag[y_pred[acc]] + "\n") acc += 1 f.close() utils.plot_confusion_matrix(y_true, y_pred, np.array(not_removed_label)) #print(666,y_without_o,y_pred) utils.plot_confusion_matrix(y_true, y_pred, np.array(not_removed_label), normalize=True) plt.show() return perf
def evaluate(model, o_idx, X_test, y_test, output_dict=True, plot_matrix=False): with torch.no_grad(): y_true = [] y_pred = [] for i, text in enumerate(X_test): numer_Y = [tag_to_ix[y] for y in y_test[i]] raw_output = model(text) _, pred_Y = torch.max(raw_output, 1) y_true.extend(numer_Y) y_pred.extend(pred_Y) y_true = np.array(y_true) y_pred = np.array(y_pred) exclude_o_idx = np.where(y_true != o_idx) y_pred_without_o = y_pred[exclude_o_idx] y_without_o = y_true[exclude_o_idx] y_pred_without_o_class = [id_to_tag[y] for y in y_pred_without_o] y_without_o_class = [id_to_tag[y] for y in y_without_o] perf = classification_report(y_without_o_class, y_pred_without_o_class, output_dict=output_dict, labels=labels) if plot_matrix: np.set_printoptions(precision=2) f = open(error_path, 'w') acc = 0 for idx, text in enumerate(X_test): for word in text: if y_true[acc] != y_pred[acc]: f.write(word + " " + id_to_tag[y_true[acc]] + " " + id_to_tag[y_pred[acc]] + "\n") acc += 1 f.close() utils.plot_confusion_matrix(y_true, y_pred, np.array(not_removed_label)) utils.plot_confusion_matrix(y_true, y_pred, np.array(not_removed_label), normalize=True) plt.show() return perf
def plot_confusion_matrix_figure(self, dirname, predict, targets, mods): """[绘制预测结果的混淆矩阵] Args: dirname ([str]): [存储图像的文件夹] predict ([二维array,(length, probality)]]): [网络的得到预测值]] targets ([一维array 或 二维array(onehot)]): [对应的真实标签] mods ([一维array]): 真实类别,str """ cm = util.generate_confusion_matrix(predict, targets, mods) util.ensure_dir(dirname) util.plot_confusion_matrix(cm, dirname, mods) print("Figure 'Confusion Matrix' generated successfully")
def plot_report(fig_name, plot_cm=False): """ plot the comparison result using different ML methods :param fig_name: saved figure name :param plot_cm: whether to plot confusion matrix result :return: figure """ dir = "log/peps mini" pattern = r'(internal|access|lock)\\\d{1,2}.csv$' pattern_valid = r'(3|6|9|12).csv$' utils.construct_set(dir, pattern, pattern_valid) X_train, X_valid, y_train, y_valid = utils.load_train_valid() methods = ["Logistic", "LDA", "QDA", "KNN", "SVM", "RF", "GBM", "MLP"] params = [ None, None, None, { "n_neighbors": 10 }, { "C": 0.25, "gamma": 0.5 }, { "max_features": 2, "n_estimators": 100 }, { "n_estimators": 400, "max_depth": 3 }, { "hidden_layer_sizes": (16, 8) } ] df_report = pd.DataFrame() for method, param in zip(methods, params): cm, report_temp, classes = utils.train(X_train, X_valid, y_train, y_valid, method=method, param=param) df_report = df_report.append(report_temp, ignore_index=True) if plot_cm: plt.figure() utils.plot_confusion_matrix(cm, classes, normalize=True) plt.title(method) if not os.path.exists(dir_fig + '/methods/'): os.makedirs(dir_fig + '/methods/') plt.savefig(dir_fig + '/methods/' + method + '.png') df_report.set_index('method', inplace=True) df_report.plot(kind='bar', rot=0, figsize=(16, 6), ylim=(0.6, 1)) plt.title(fig_name) if not os.path.exists(dir_fig): os.makedirs(dir_fig) plt.savefig(dir_fig + '/' + fig_name + '.png')
def test_on_fer_test_set(fer_path, model_type="CustomVGG"): start_time = time() fer = pd.read_csv(fer_path) if "attribution" not in fer: raise Exception( "Fer not split between train/val/test. Please run split_fer script." ) fer_test = fer[fer["attribution"] == "test"].reset_index() model = load_model(model_type=model_type) print("Loaded fer test set and model in {}s".format( round(time() - start_time, 2))) start_time = time() def preprocess_batch(pixelstring_batch, emotions_batch, DEVICE): if model_type == "CustomVGG": return preprocess_batch_custom_vgg(pixelstring_batch, emotions_batch, DEVICE, False, config["loss_mode"]) elif model_type == "DenseSIFTHybrid": return preprocess_batch_dense_sift_hybrid(pixelstring_batch, emotions_batch, DEVICE, False, config["loss_mode"]) elif model_type == "SIFTHybrid": return preprocess_batch_sift_hybrid(pixelstring_batch, emotions_batch, DEVICE, False, config["loss_mode"]) use_descriptors = (model_type == "DenseSIFTHybrid" or model_type == "SIFTHybrid") dummy_weights = torch.FloatTensor([1] * len(config["catslist"])).to( DEVICE) # we don't care about the test loss value here. proba, _, acc, cm1, cm2, acc_fact = evaluate( model, fer_test, preprocess_batch, dummy_weights, DEVICE, compute_cm=True, use_descriptors=use_descriptors) print("FINAL ACCURACY: {}".format(acc)) print("Average predicted proba for right class: {}".format(proba)) print("Duration on {} test faces: {}s".format( len(fer_test), round(time() - start_time, 2))) print("Accuracy with grouped classes : {}".format(acc_fact)) print("Close the confusion matrices to end the script.") plot_confusion_matrix(cm1, config["catslist"]) plot_confusion_matrix(cm2, ["bad", "good", "surprise", "neutral"])
def evaluate(validation_generator): # evaluate the model with validation set y_true = np.array([0] * len(os.listdir('../data/validation/cats/')) + [1] * len(os.listdir('../data/validation/dogs/'))) model = load_model('../CNN/cnn.h5') print(model.summary()) scores = model.evaluate_generator(validation_generator) print('val_loss: {}, val_acc: {}'.format(scores[0], scores[1])) y_pred = get_predictions(model, validation_generator) cm = confusion_matrix(y_true, y_pred) cm_percent = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] # plot percentage confusion matrix plot_confusion_matrix(cm_percent, class_names=['Cat', 'Dog']) plt.savefig('../MLP/cm_percent_val.png', format='png') plt.show()
def test_utils(figs=False): label_names = ['a', 'b', 'c', 'd', 'e', 'f', 'g'] predicted = [0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6] target = [0, 0, 2, 3, 4, 5, 6, 0, 1, 1, 3, 4, 5, 6, 0, 1, 2, 2, 4, 5, 6] if figs: utils.plot_confusion_matrix( predicted, target, label_names ) predicted, target = utils.label2name( predicted, target, label_names ) print(predicted) print(target)
def test_model(model, labels, data_name='sk_eigenjoint_nor_528', valid_segment_idx=650): data = data_dir + '/' + data_name file_paths = glob(data + "/*.npy") losses = [] ground_ys = [] pred_ys = [] for index, file_path in enumerate(file_paths): if index >= valid_segment_idx: print 'Predict ' + file_path valid_x = np.load(file_path) valid_x = np.reshape(valid_x, newshape=(1, valid_x.shape[0], valid_x.shape[1])) valid_y = labels[index] # valid_y = valid_y[:-1] pred_y = model.predict_on_batch(valid_x) file_name = str(index - valid_segment_idx) plot_fig(pred_y, valid_y, file_name, save_flag=True) pred_y = np.argmax(pred_y, axis=2) pred_y = pred_y.ravel() ground_ys.append(valid_y) pred_ys.append(pred_y) # pred_y = clear_pred(pred_y) loss = eval_jaccard(valid_y, pred_y) losses.append(loss) ground_ys = np.concatenate(ground_ys) pred_ys = np.concatenate(pred_ys) print(classification_report(ground_ys, pred_ys)) cnf_matrix = confusion_matrix(ground_ys, pred_ys) np.set_printoptions(precision=2) cnf_matrix = cnf_matrix.astype('float') / cnf_matrix.sum( axis=1)[:, np.newaxis] plt.figure() plot_confusion_matrix(cnf_matrix, classes=range(21), normalize=True, title='Normalized confusion matrix') plt.show() # print cnf_matrix return losses, cnf_matrix
def eval(logdir): # Load graph model = Net1() # dataflow df = Net1DataFlow(hp.Test1.data_path, hp.Test1.batch_size) ckpt = tf.train.latest_checkpoint(logdir) pred_conf = PredictConfig(model=model, input_names=get_eval_input_names(), output_names=get_eval_output_names()) if ckpt: pred_conf.session_init = SaverRestore(ckpt) predictor = OfflinePredictor(pred_conf) x_mfccs, y_ppgs = next(df().get_data()) y_ppg_1d, pred_ppg_1d, summ_loss, summ_acc = predictor(x_mfccs, y_ppgs) # plot confusion matrix _, idx2phn = load_vocab() y_ppg_1d = [idx2phn[i] for i in y_ppg_1d] pred_ppg_1d = [idx2phn[i] for i in pred_ppg_1d] summ_cm = plot_confusion_matrix(y_ppg_1d, pred_ppg_1d, phns) writer = tf.summary.FileWriter(logdir) writer.add_summary(summ_loss) writer.add_summary(summ_acc) writer.add_summary(summ_cm) writer.close()
def test(model, X_test, y_test, enc): y_test_pred = model.predict(X_test) y_test_pred = np.argmax(y_test_pred, axis=1) y_test_pred_labels = enc.inverse_transform(y_test_pred) cm = metrics.confusion_matrix(y_true=y_test, y_pred=y_test_pred_labels, labels=enc.classes_) plt.figure() utils.plot_confusion_matrix(cm, enc.classes_, normalize=False) plt.show() test_acc = metrics.accuracy_score(enc.transform(y_test), y_test_pred) print("Accuracy: {}".format(test_acc))
def main(arguments): # load the features of the dataset features = datasets.load_breast_cancer().data # standardize the features features = StandardScaler().fit_transform(features) # get the number of features num_features = features.shape[1] # load the corresponding labels for the features labels = datasets.load_breast_cancer().target # transform the labels to {-1, +1} labels[labels == 0] = -1 # split the dataset to 70/30 partition: 70% train, 30% test train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size=0.3, stratify=labels) train_size = train_features.shape[0] test_size = test_features.shape[0] # slice the dataset as per the batch size train_features = train_features[:train_size - (train_size % BATCH_SIZE)] train_labels = train_labels[:train_size - (train_size % BATCH_SIZE)] test_features = test_features[:test_size - (test_size % BATCH_SIZE)] test_labels = test_labels[:test_size - (test_size % BATCH_SIZE)] # instantiate the SVM class model = SVM(alpha=LEARNING_RATE, batch_size=BATCH_SIZE, svm_c=arguments.svm_c, num_classes=NUM_CLASSES, num_features=num_features) # train the instantiated model model.train(epochs=arguments.num_epochs, log_path=arguments.log_path, train_data=[train_features, train_labels], train_size=train_features.shape[0], validation_data=[test_features, test_labels], validation_size=test_features.shape[0], result_path=arguments.result_path) test_conf, test_accuracy = utils.plot_confusion_matrix(phase='testing', path=arguments.result_path, class_names=['benign', 'malignant']) print('True negatives : {}'.format(test_conf[0][0])) print('False negatives : {}'.format(test_conf[1][0])) print('True positives : {}'.format(test_conf[1][1])) print('False positives : {}'.format(test_conf[0][1])) print('Testing accuracy : {}'.format(test_accuracy))
num_classes = len(y.unique()) # Guardamos las predicciones y clases reales de todos los fold en una lista y_pred_total = [] y_test_total = [] k_fold = cross_validation.StratifiedKFold(y, n_folds = 10, indices = True) for train_indices, test_indices in k_fold: X_train = X.iloc[train_indices] y_train = y.iloc[train_indices] X_test = X.iloc[test_indices] y_test = y.iloc[test_indices] clf = tree.DecisionTreeClassifier( criterion = 'entropy') # Ajusto el modelo y predigo clf = clf.fit( X_train, y_train ) y_pred = clf.predict( X_test ) y_pred_total += y_pred.tolist() y_test_total += y_test.tolist() precision = precision_score(y_test_total, y_pred_total, average = None) recall = recall_score(y_test_total, y_pred_total, average = None) f_score = f1_score(y_test_total, y_pred_total, average = None) plot_confusion_matrix(y_test_total, y_pred_total, 'Decision Tree Classifier', normed=True)
recalls[label][median], label='%s vs rest' % genre_list[label]) plot_roc(roc_scores[label][median], desc, tprs[label][median], fprs[label][median], label='%s vs rest' % genre_list[label]) all_pr_scores = np.asarray(pr_scores.values()).flatten() summary = (np.mean(scores), np.std(scores), np.mean(all_pr_scores), np.std(all_pr_scores)) print("%.3f\t%.3f\t%.3f\t%.3f\t" % summary) return np.mean(train_errors), np.mean(test_errors), np.asarray(cms) def create_model(): from sklearn.linear_model.logistic import LogisticRegression clf = LogisticRegression() return clf if __name__ == "__main__": X, y = read_fft(genre_list) train_avg, test_avg, cms = train_model( create_model, X, y, "Log Reg FFT", plot=True) cm_avg = np.mean(cms, axis=0) cm_norm = cm_avg / np.sum(cm_avg, axis=0) plot_confusion_matrix(cm_norm, genre_list, "fft", "Confusion matrix of an FFT based classifier")
name = 'model%d.png' % np.random.randint(10e8) path = '/tmp/%s' % name # This command only save the image of your model plot(model, to_file=path, show_shapes=True, show_layer_names=True) # Load and review the image (increase both dimension of figsize # if you see too small images) plt.figure(figsize=(8, 120)) plt.imshow(mpimg.imread(path)) plt.axis('off') plt.show() # Callback checkpointer = ModelCheckpoint(filepath='seq_example_best_weights.hdf5', verbose=1, save_best_only=True) history = model.fit(X_train, y_train, batch_size=BATCH_SIZE, nb_epoch=3, verbose=1, shuffle=True, validation_split=0.2, callbacks=[checkpointer]) y = model.predict(X_test, batch_size=BATCH_SIZE, verbose=1) # the prediction is just probability values, we select the highest probability # values y = np.argmax(y, axis=-1) # convert one-hot encoded y_test to label also y_test = np.argmax(y_test, axis=-1) print('Test accuracy:', accuracy_score(y_test, y)) print('Classification report:', classification_report(y_test, y)) plt.figure() plot_confusion_matrix(confusion_matrix(y_test, y), labels=range(0, 4)) plt.show()
def do_classify(self, name): train_avg, test_avg, cms = self._classify_obj.train_model(name, plot=True) cm_avg = np.mean(cms, axis=0) cm_norm = cm_avg / np.sum(cm_avg, axis=0) name, desc = self._classify_obj.get_way_name() plot_confusion_matrix(cm_norm, AbstractSoundClassifyBase.genre_list, name, desc)
desc = "%s %s" % (name, genre_list[label]) plot_roc(roc_scores[label][median], desc, tprs[label][median], fprs[label][median], label='%s vs rest' % genre_list[label]) all_pr_scores = np.asarray(pr_scores.values()).flatten() summary = (np.mean(scores), np.std(scores), np.mean(all_pr_scores), np.std(all_pr_scores)) print("%.3f\t%.3f\t%.3f\t%.3f\t" % summary) return np.mean(train_errors), np.mean(test_errors), np.asarray(cms) def create_model(): from sklearn.linear_model.logistic import LogisticRegression clf = LogisticRegression() return clf if __name__ == "__main__": X, y = read_ceps(genre_list) train_avg, test_avg, cms = train_model( create_model, X, y, "Log Reg CEPS", plot=True) cm_avg = np.mean(cms, axis=0) cm_norm = cm_avg / np.sum(cm_avg, axis=0) plot_confusion_matrix(cm_norm, genre_list, "ceps", "Confusion matrix of a CEPS based classifier")
plot_roc_curves(roc_scores[label][median], desc, tprs[label][median],fprs[label][median], label='%s vs rest' % genre_list[label]) all_pr_scores = np.asarray(pr_scores.values()).flatten() summary = (np.mean(scores), np.std(scores), np.mean(all_pr_scores), np.std(all_pr_scores)) #print("%.3f\t%.3f\t%.3f\t%.3f\t" % summary) #save the trained model to disk joblib.dump(clf, 'saved_model/SVMFFT.pkl') return np.mean(train_errors), np.mean(test_errors), np.asarray(cms) if __name__ == "__main__": start = timeit.default_timer() print print " Starting classification \n" print " Classification running ... \n" X, y = read_fft(genre_list) print X,y train_avg, test_avg, cms = train_model(X, y, "fft", plot=True) cm_avg = np.mean(cms, axis=0) cm_norm = cm_avg / np.sum(cm_avg, axis=0) print " Classification finished \n" stop = timeit.default_timer() print " Total time taken (s) = ", (stop - start) print "\n Plotting confusion matrix ... \n" plot_confusion_matrix(cm_norm, genre_list, "fft","SVM FFT classifier - Confusion matrix") print " All Done\n" print " See plots in 'graphs' directory \n"
plot_roc_curves(roc_scores[label][median], desc, tprs[label][median],fprs[label][median], label='%s vs rest' % genre_list[label]) all_pr_scores = np.asarray(pr_scores.values()).flatten() summary = (np.mean(scores), np.std(scores), np.mean(all_pr_scores), np.std(all_pr_scores)) #print("%.3f\t%.3f\t%.3f\t%.3f\t" % summary) #save the trained model to disk joblib.dump(clf, 'saved_model/model_ceps.pkl') return np.mean(train_errors), np.mean(test_errors), np.asarray(cms) if __name__ == "__main__": start = timeit.default_timer() print print " Starting classification \n" print " Classification running ... \n" X, y = read_fft(genre_list) print X,y train_avg, test_avg, cms = train_model(X, y, "ceps", plot=True) cm_avg = np.mean(cms, axis=0) cm_norm = cm_avg / np.sum(cm_avg, axis=0) print " Classification finished \n" stop = timeit.default_timer() print " Total time taken (s) = ", (stop - start) print "\n Plotting confusion matrix ... \n" plot_confusion_matrix(cm_norm, genre_list, "ceps","CEPS classifier - Confusion matrix") print " All Done\n" print " See plots in 'graphs' directory \n"