def my_training(data_name, model_name, tag): # 加载数据集 train_x, train_y, test_x, test_y = load_data(data_name, training=True) # 获取数据集向量的度 with open(data_name) as op: rd = csv.reader(op) for raw in rd: dim = len(raw) - 1 break op.close() # 开始训练 if tag == "video": model = fit_video(train_x, train_y, dim) else: model = fit_audio(train_x, train_y, dim) # 保存模型 saving_model(model, model_name) # 绘制混淆矩阵 confusion_matrix.get_confusion_matrix( data_name, model_name, "image/" + tag + "_confusion_matrix.png") # 测试模型 score = model.evaluate(test_x, test_y) # 打印测试数据 print(model_name, " complete the training") print('Test loss:', score[0]) print('Test accuracy:', score[1])
import os, sys #sys.path.insert(0, '../') import matplotlib matplotlib.use('Agg') import numpy as np import matplotlib.pyplot as plt from confusion_matrix import get_confusion_matrix conf_mat = get_confusion_matrix('/scratch/16824/data/testlist_class.txt', 'cls_results.txt', 30) sum_per_row = np.sum(conf_mat, axis=1) conf_mat = (0.0 + conf_mat) / sum_per_row[:, np.newaxis] #print " ".join(str(ele) for ele in np.sum(conf_mat, axis=1)) plt.matshow(conf_mat) #plt.show() plt.xticks(np.arange(0,30,5)) plt.yticks(np.arange(0,30,5)) plt.savefig("test.png")
activation='relu')(inp) # First hidden ReLU layer hidden_2 = Dense(hidden_size, activation='relu')(hidden_1) # Second hidden ReLU layer out = Dense(num_classes, activation='softmax')(hidden_2) # Output softmax layer model = Model( input=inp, output=out ) # To define a model, just specify its input and output layers model.compile( loss='categorical_crossentropy', # using the cross-entropy loss function optimizer='adam', # using the Adam optimiser metrics=['accuracy']) # reporting the accuracy model.fit( train_x, train_y, # Train the model using the training set... batch_size=batch_size, nb_epoch=num_epochs, verbose=1, validation_split=0.08) # ...holding out 10% of the data for validation print "\n" else: model = load_model(saved_model_name) print model.evaluate(test_x, test_y, verbose=1) # Evaluate the trained model on the test set! confusion_matrix.get_confusion_matrix(model, test_x, test_y) model.save(saved_model_name)
def use_similarities(response, book, vect, x_train, x_test, pred_train, pred_test, class_train, class_test, book_idx_train, book_idx_test, response_link_train, response_link_test, use_bert, class_names): bert_model = SentenceTransformer('bert-base-nli-mean-tokens') if book: if use_bert: tfidf_books = get_bert_books(bert_model) book_dict = get_book_dict() else: tfidf_books = get_tfidf_books(vect) book_dict = get_book_dict() book_idx_train = np.array(book_idx_train) book_idx_test = np.array(book_idx_test) if response: if use_bert: response_tfidf_dict = get_response_bert_dict(bert_model) else: response_tfidf_dict = get_response_tfidf_dict(vect) response_link_train = np.array(response_link_train) response_link_test = np.array(response_link_test) rf_arr_train = [] for i in range(len(pred_train)): if response and book: similarity_response = cosine_similarity( [x_train[i]], [response_tfidf_dict[response_link_train[i]]])[0][0] similarity_book = cosine_similarity( [x_train[i]], [tfidf_books[book_dict[book_idx_train[i]]]])[0][0] rf_arr_train.append( [pred_train[i], similarity_response, similarity_book]) if response and not book: similarity_response = cosine_similarity( [x_train[i]], [response_tfidf_dict[response_link_train[i]]])[0][0] rf_arr_train.append([pred_train[i], similarity_response]) if book and not response: similarity_book = cosine_similarity( [x_train[i]], [tfidf_books[book_dict[book_idx_train[i]]]])[0][0] rf_arr_train.append([pred_train[i], similarity_book]) # prob_list = prob_train[i].tolist() # prob_list.append(similarity_response) # prob_list.append(similarity_book) # rf_arr_train.append(prob_list) # print(rf_arr_train) rf_arr_test = [] for i in range(len(pred_test)): if response and book: similarity_response = cosine_similarity( [x_test[i]], [response_tfidf_dict[response_link_test[i]]])[0][0] similarity_book = cosine_similarity( [x_test[i]], [tfidf_books[book_dict[book_idx_test[i]]]])[0][0] rf_arr_test.append( [pred_test[i], similarity_response, similarity_book]) if response and not book: similarity_response = cosine_similarity( [x_test[i]], [response_tfidf_dict[response_link_test[i]]])[0][0] rf_arr_test.append([pred_test[i], similarity_response]) if book and not response: similarity_book = cosine_similarity( [x_test[i]], [tfidf_books[book_dict[book_idx_test[i]]]])[0][0] rf_arr_test.append([pred_test[i], similarity_book]) # prob_list = prob_test[i].tolist() # prob_list.append(similarity_response) # prob_list.append(similarity_book) # rf_arr_test.append(prob_list) # print(rf_arr_test) clf_rf = RandomForestClassifier(max_depth=10, random_state=0, n_estimators=10) # Train random forest clf_rf.fit(rf_arr_train, class_train) # Make predictions rf_pred = clf_rf.predict(rf_arr_test) # Evaluation print( 'Classification report - similarities were used ---------------------------' ) print( metrics.classification_report(class_test, rf_pred, digits=3, zero_division=0)) get_confusion_matrix(class_test, rf_pred, class_names)
fig['layout']['yaxis4'].update(title='Abstract Phase', categoryorder='array', categoryarray=data.abstract_phase_keys, tickangle=-45) fig['layout']['yaxis5'].update(title='Accuracy') fig['layout']['yaxis6'].update(title='Latency', type='log') # Add traces fig.append_trace(trace_human, 1, 1) fig.append_trace(trace_computer, 1, 1) fig.append_trace(trace_agreement, 2, 1) fig.append_trace(trace_agreement_latency_mask, 2, 1) fig.append_trace(trace_phase, 3, 1) fig.append_trace(trace_abstract_phase, 4, 1) fig.append_trace(trace_running_accuracy, 5, 1) fig.append_trace(trace_latency, 6, 1) fig.append_trace(trace_latency_threshold, 6, 1) # Render plot offline.plot(fig, filename=participant_id + '.html', auto_open=False) # Append confusion matrix get_confusion_matrix(participant_id + '_confusion_matrix.png', data.confusion_matrix, data.confusion_matrix_labels) data_uri = open(participant_id + '_confusion_matrix.png', 'rb').read().encode('base64').replace('\n', '') img_tag = '<img src="data:image/png;base64,%s">' % data_uri os.remove(participant_id + '_confusion_matrix.png') with open(participant_id + '.html', "a") as f: f.seek(-14, 2) f.write(img_tag) os.system("start "+participant_id + '.html')
# print('Confusion matrix --------------------------------') # print(metrics.confusion_matrix(class_test, predictions)) print( 'Classification report (TF-IDF + MLP) ---------------------------------' ) print( metrics.classification_report(class_test, predictions, digits=3, zero_division=0)) # Confusion matrix class_names = [class_dict[x] for x in list(set(class_test))] get_confusion_matrix(class_test, predictions, class_names) # ============================================================================================================= # --- Similarities ----------------------------------------------------------- if use_response_similarity or use_book_similarity: print("--- SIMILARITIES ---") pred_train, pred_test = get_predictions(clf, x_train, x_test) prob_train, prob_test = get_probabilities(clf, x_train, x_test) # pred_train = class_train # print(class_test) # print(pred_test) if use_bert:
# print(true_classes) # print(predicted_classes) # print(f"Validation Loss Epoch: {epoch_loss}") # print(f"Test Accuracy: {epoch_accu}") print('Classification report (DistilBERT) ---------------------------') print( metrics.classification_report(true_classes, predicted_classes, digits=3, zero_division=0)) # Confusion matrix class_names = [class_dict[x] for x in list(set(class_test))] get_confusion_matrix(true_classes, predicted_classes, class_names) # --- Similarities ----------------------------------------------------------- if use_response_similarity or use_book_similarity: print("--- SIMILARITIES ---") pred_train = class_train pred_test = predicted_classes tfidf_vectorizer = None if len(class_test) % 2 != 0: class_test = class_test[:-1] if use_bert: print("Use BERT embeddings for similarity") bert_model = SentenceTransformer('bert-base-nli-mean-tokens')
tfDeep.save() probs = tfDeep.eval(mnist.train.images) # recover the predicted classes Y my_predictions = probs[:, 1] > 0.5 my_predictions = my_predictions.flatten() # graph the decision surface bbox = (np.min(input_data, axis=0), np.max(input_data, axis=0)) #axis limits of the graph C = 10 possible_labels = np.array(range(C)) mat = get_confusion_matrix(np.argmax(probs, axis=1), np.argmax(mnist.train.labels, axis=1), possible_labels, True, format_length=10) accuracy, prec, rec = eval_perf_multi(mat) # AP_c0 = eval_AP(np.argmax(mnist.train.labels,axis=1)[probs[:,0].argsort()],0) # AP_c1 = eval_AP(np.argmax(mnist.train.labels,axis=1)[probs[:,1].argsort()],1) # AP_c2 = eval_AP(np.argmax(mnist.train.labels,axis=1)[probs[:,2].argsort()],2) # AP_c3 = eval_AP(np.argmax(mnist.train.labels,axis=1)[probs[:,3].argsort()],3) # AP_c4 = eval_AP(np.argmax(mnist.train.labels,axis=1)[probs[:,4].argsort()],4) # AP_c5 = eval_AP(np.argmax(mnist.train.labels,axis=1)[probs[:,5].argsort()],5) # AP_c6 = eval_AP(np.argmax(mnist.train.labels,axis=1)[probs[:,6].argsort()],6) # AP_c7 = eval_AP(np.argmax(mnist.train.labels,axis=1)[probs[:,7].argsort()],7) # AP_c8 = eval_AP(np.argmax(mnist.train.labels,axis=1)[probs[:,8].argsort()],8) # AP_c9 = eval_AP(np.argmax(mnist.train.labels,axis=1)[probs[:,9].argsort()],9) print print "Accuracy"
import numpy as np import matplotlib.pyplot as plt from confusion_matrix import get_confusion_matrix import argparse parser = argparse.ArgumentParser() parser.add_argument('filename', help = 'result file with label and first 5 predicted class') args = parser.parse_args() if args.filename: file_name = args.filename else: file_name = 'object/obj_res_real.txt' class_num = 101 conf_mat = get_confusion_matrix(file_name, class_num) sum_per_row = np.sum(conf_mat, axis=1) conf_mat = (0.0 + conf_mat) / sum_per_row[:, np.newaxis] #print " ".join(str(ele) for ele in np.sum(conf_mat, axis=1)) plt.matshow(conf_mat) #plt.show() plt.xticks(np.arange(0,101,5)) plt.yticks(np.arange(0,101,5)) plt.savefig(os.path.basename(file_name).strip().split('.')[0] + ".png") ind_ranking = conf_mat.argsort()[:, -1:-6:-1] conf_mat.sort()
print("--- Evaluation time: %s seconds ---" % (time.time() - start_time)) # print(idxs) true_vals = true_vals.tolist() # print(true_vals) print('Classification report (BERT) ---------------------------') print( metrics.classification_report(true_vals, idxs, digits=3, zero_division=0)) # Confusion matrix class_names = [class_dict[x] for x in list(set(class_test))] get_confusion_matrix(true_vals, idxs, class_names) # --- Similarities ----------------------------------------------------------- if use_response_similarity or use_book_similarity: print("--- SIMILARITIES ---") pred_train = class_train pred_test = idxs tfidf_vectorizer = None if len(class_test) % 2 != 0: class_test = class_test[:-1] if use_bert: print("Use BERT embeddings for similarity")
import matplotlib.pyplot as plt from confusion_matrix import get_confusion_matrix import argparse parser = argparse.ArgumentParser() parser.add_argument('filename', help='result file with label and first 5 predicted class') args = parser.parse_args() if args.filename: file_name = args.filename else: file_name = 'object/obj_res_real.txt' class_num = 101 conf_mat = get_confusion_matrix(file_name, class_num) sum_per_row = np.sum(conf_mat, axis=1) conf_mat = (0.0 + conf_mat) / sum_per_row[:, np.newaxis] #print " ".join(str(ele) for ele in np.sum(conf_mat, axis=1)) plt.matshow(conf_mat) #plt.show() plt.xticks(np.arange(0, 101, 5)) plt.yticks(np.arange(0, 101, 5)) plt.savefig(os.path.basename(file_name).strip().split('.')[0] + ".png") ind_ranking = conf_mat.argsort()[:, -1:-6:-1] conf_mat.sort() value_ranking = conf_mat[:, -1:-6:-1]