Пример #1
0
def my_training(data_name, model_name, tag):
    # 加载数据集
    train_x, train_y, test_x, test_y = load_data(data_name, training=True)

    # 获取数据集向量的度
    with open(data_name) as op:
        rd = csv.reader(op)
        for raw in rd:
            dim = len(raw) - 1
            break
        op.close()

    # 开始训练
    if tag == "video":
        model = fit_video(train_x, train_y, dim)
    else:
        model = fit_audio(train_x, train_y, dim)

    # 保存模型
    saving_model(model, model_name)

    # 绘制混淆矩阵
    confusion_matrix.get_confusion_matrix(
        data_name, model_name, "image/" + tag + "_confusion_matrix.png")

    # 测试模型
    score = model.evaluate(test_x, test_y)

    # 打印测试数据
    print(model_name, " complete the training")
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])
import os, sys
#sys.path.insert(0, '../')
import matplotlib
matplotlib.use('Agg')
import numpy as np
import matplotlib.pyplot as plt
from confusion_matrix import get_confusion_matrix

conf_mat = get_confusion_matrix('/scratch/16824/data/testlist_class.txt', 'cls_results.txt', 30)

sum_per_row = np.sum(conf_mat, axis=1)
conf_mat = (0.0 + conf_mat) / sum_per_row[:, np.newaxis]
#print " ".join(str(ele) for ele in np.sum(conf_mat, axis=1))

plt.matshow(conf_mat)
#plt.show()
plt.xticks(np.arange(0,30,5))
plt.yticks(np.arange(0,30,5))

plt.savefig("test.png")

Пример #3
0
                     activation='relu')(inp)  # First hidden ReLU layer
    hidden_2 = Dense(hidden_size,
                     activation='relu')(hidden_1)  # Second hidden ReLU layer
    out = Dense(num_classes,
                activation='softmax')(hidden_2)  # Output softmax layer

    model = Model(
        input=inp, output=out
    )  # To define a model, just specify its input and output layers

    model.compile(
        loss='categorical_crossentropy',  # using the cross-entropy loss function
        optimizer='adam',  # using the Adam optimiser
        metrics=['accuracy'])  # reporting the accuracy

    model.fit(
        train_x,
        train_y,  # Train the model using the training set...
        batch_size=batch_size,
        nb_epoch=num_epochs,
        verbose=1,
        validation_split=0.08)  # ...holding out 10% of the data for validation
    print "\n"
else:
    model = load_model(saved_model_name)

print model.evaluate(test_x, test_y,
                     verbose=1)  # Evaluate the trained model on the test set!
confusion_matrix.get_confusion_matrix(model, test_x, test_y)
model.save(saved_model_name)
Пример #4
0
def use_similarities(response, book, vect, x_train, x_test, pred_train,
                     pred_test, class_train, class_test, book_idx_train,
                     book_idx_test, response_link_train, response_link_test,
                     use_bert, class_names):

    bert_model = SentenceTransformer('bert-base-nli-mean-tokens')

    if book:
        if use_bert:
            tfidf_books = get_bert_books(bert_model)
            book_dict = get_book_dict()
        else:
            tfidf_books = get_tfidf_books(vect)
            book_dict = get_book_dict()

        book_idx_train = np.array(book_idx_train)
        book_idx_test = np.array(book_idx_test)

    if response:
        if use_bert:
            response_tfidf_dict = get_response_bert_dict(bert_model)
        else:
            response_tfidf_dict = get_response_tfidf_dict(vect)

        response_link_train = np.array(response_link_train)
        response_link_test = np.array(response_link_test)

    rf_arr_train = []
    for i in range(len(pred_train)):
        if response and book:
            similarity_response = cosine_similarity(
                [x_train[i]],
                [response_tfidf_dict[response_link_train[i]]])[0][0]
            similarity_book = cosine_similarity(
                [x_train[i]],
                [tfidf_books[book_dict[book_idx_train[i]]]])[0][0]
            rf_arr_train.append(
                [pred_train[i], similarity_response, similarity_book])

        if response and not book:
            similarity_response = cosine_similarity(
                [x_train[i]],
                [response_tfidf_dict[response_link_train[i]]])[0][0]
            rf_arr_train.append([pred_train[i], similarity_response])

        if book and not response:
            similarity_book = cosine_similarity(
                [x_train[i]],
                [tfidf_books[book_dict[book_idx_train[i]]]])[0][0]
            rf_arr_train.append([pred_train[i], similarity_book])

        # prob_list = prob_train[i].tolist()
        # prob_list.append(similarity_response)
        # prob_list.append(similarity_book)
        # rf_arr_train.append(prob_list)

    # print(rf_arr_train)

    rf_arr_test = []
    for i in range(len(pred_test)):
        if response and book:
            similarity_response = cosine_similarity(
                [x_test[i]],
                [response_tfidf_dict[response_link_test[i]]])[0][0]
            similarity_book = cosine_similarity(
                [x_test[i]], [tfidf_books[book_dict[book_idx_test[i]]]])[0][0]
            rf_arr_test.append(
                [pred_test[i], similarity_response, similarity_book])

        if response and not book:
            similarity_response = cosine_similarity(
                [x_test[i]],
                [response_tfidf_dict[response_link_test[i]]])[0][0]
            rf_arr_test.append([pred_test[i], similarity_response])

        if book and not response:
            similarity_book = cosine_similarity(
                [x_test[i]], [tfidf_books[book_dict[book_idx_test[i]]]])[0][0]
            rf_arr_test.append([pred_test[i], similarity_book])

        # prob_list = prob_test[i].tolist()
        # prob_list.append(similarity_response)
        # prob_list.append(similarity_book)
        # rf_arr_test.append(prob_list)

    # print(rf_arr_test)

    clf_rf = RandomForestClassifier(max_depth=10,
                                    random_state=0,
                                    n_estimators=10)

    # Train random forest
    clf_rf.fit(rf_arr_train, class_train)

    # Make predictions
    rf_pred = clf_rf.predict(rf_arr_test)

    # Evaluation
    print(
        'Classification report - similarities were used ---------------------------'
    )
    print(
        metrics.classification_report(class_test,
                                      rf_pred,
                                      digits=3,
                                      zero_division=0))

    get_confusion_matrix(class_test, rf_pred, class_names)
fig['layout']['yaxis4'].update(title='Abstract Phase', categoryorder='array', categoryarray=data.abstract_phase_keys,
                               tickangle=-45)
fig['layout']['yaxis5'].update(title='Accuracy')
fig['layout']['yaxis6'].update(title='Latency', type='log')

# Add traces
fig.append_trace(trace_human, 1, 1)
fig.append_trace(trace_computer, 1, 1)
fig.append_trace(trace_agreement, 2, 1)
fig.append_trace(trace_agreement_latency_mask, 2, 1)
fig.append_trace(trace_phase, 3, 1)
fig.append_trace(trace_abstract_phase, 4, 1)
fig.append_trace(trace_running_accuracy, 5, 1)
fig.append_trace(trace_latency, 6, 1)
fig.append_trace(trace_latency_threshold, 6, 1)

# Render plot
offline.plot(fig, filename=participant_id + '.html', auto_open=False)

# Append confusion matrix
get_confusion_matrix(participant_id + '_confusion_matrix.png', data.confusion_matrix, data.confusion_matrix_labels)
data_uri = open(participant_id + '_confusion_matrix.png', 'rb').read().encode('base64').replace('\n', '')
img_tag = '<img src="data:image/png;base64,%s">' % data_uri
os.remove(participant_id + '_confusion_matrix.png')

with open(participant_id + '.html', "a") as f:
    f.seek(-14, 2)
    f.write(img_tag)

os.system("start "+participant_id + '.html')
Пример #6
0
    # print('Confusion matrix --------------------------------')
    # print(metrics.confusion_matrix(class_test, predictions))

    print(
        'Classification report (TF-IDF + MLP) ---------------------------------'
    )
    print(
        metrics.classification_report(class_test,
                                      predictions,
                                      digits=3,
                                      zero_division=0))

    # Confusion matrix
    class_names = [class_dict[x] for x in list(set(class_test))]
    get_confusion_matrix(class_test, predictions, class_names)

    # =============================================================================================================

    # --- Similarities -----------------------------------------------------------
    if use_response_similarity or use_book_similarity:
        print("--- SIMILARITIES ---")

        pred_train, pred_test = get_predictions(clf, x_train, x_test)
        prob_train, prob_test = get_probabilities(clf, x_train, x_test)
        # pred_train = class_train

        # print(class_test)
        # print(pred_test)

        if use_bert:
Пример #7
0
    # print(true_classes)
    # print(predicted_classes)

    # print(f"Validation Loss Epoch: {epoch_loss}")
    # print(f"Test Accuracy: {epoch_accu}")

    print('Classification report (DistilBERT) ---------------------------')
    print(
        metrics.classification_report(true_classes,
                                      predicted_classes,
                                      digits=3,
                                      zero_division=0))

    # Confusion matrix
    class_names = [class_dict[x] for x in list(set(class_test))]
    get_confusion_matrix(true_classes, predicted_classes, class_names)

    # --- Similarities -----------------------------------------------------------
    if use_response_similarity or use_book_similarity:
        print("--- SIMILARITIES ---")

        pred_train = class_train
        pred_test = predicted_classes
        tfidf_vectorizer = None

        if len(class_test) % 2 != 0:
            class_test = class_test[:-1]

        if use_bert:
            print("Use BERT embeddings for similarity")
            bert_model = SentenceTransformer('bert-base-nli-mean-tokens')
Пример #8
0
tfDeep.save()

probs = tfDeep.eval(mnist.train.images)
# recover the predicted classes Y
my_predictions = probs[:, 1] > 0.5
my_predictions = my_predictions.flatten()

# graph the decision surface
bbox = (np.min(input_data, axis=0), np.max(input_data, axis=0))

#axis limits of the graph
C = 10
possible_labels = np.array(range(C))
mat = get_confusion_matrix(np.argmax(probs, axis=1),
                           np.argmax(mnist.train.labels, axis=1),
                           possible_labels,
                           True,
                           format_length=10)
accuracy, prec, rec = eval_perf_multi(mat)
# AP_c0 = eval_AP(np.argmax(mnist.train.labels,axis=1)[probs[:,0].argsort()],0)
# AP_c1 = eval_AP(np.argmax(mnist.train.labels,axis=1)[probs[:,1].argsort()],1)
# AP_c2 = eval_AP(np.argmax(mnist.train.labels,axis=1)[probs[:,2].argsort()],2)
# AP_c3 = eval_AP(np.argmax(mnist.train.labels,axis=1)[probs[:,3].argsort()],3)
# AP_c4 = eval_AP(np.argmax(mnist.train.labels,axis=1)[probs[:,4].argsort()],4)
# AP_c5 = eval_AP(np.argmax(mnist.train.labels,axis=1)[probs[:,5].argsort()],5)
# AP_c6 = eval_AP(np.argmax(mnist.train.labels,axis=1)[probs[:,6].argsort()],6)
# AP_c7 = eval_AP(np.argmax(mnist.train.labels,axis=1)[probs[:,7].argsort()],7)
# AP_c8 = eval_AP(np.argmax(mnist.train.labels,axis=1)[probs[:,8].argsort()],8)
# AP_c9 = eval_AP(np.argmax(mnist.train.labels,axis=1)[probs[:,9].argsort()],9)
print
print "Accuracy"
import numpy as np
import matplotlib.pyplot as plt
from confusion_matrix import get_confusion_matrix
import argparse

parser = argparse.ArgumentParser()
parser.add_argument('filename', help = 'result file with label and first 5 predicted class')
args = parser.parse_args()

if args.filename:
	file_name = args.filename
else:
	file_name = 'object/obj_res_real.txt'

class_num = 101
conf_mat = get_confusion_matrix(file_name, class_num)


sum_per_row = np.sum(conf_mat, axis=1)
conf_mat = (0.0 + conf_mat) / sum_per_row[:, np.newaxis]
#print " ".join(str(ele) for ele in np.sum(conf_mat, axis=1))

plt.matshow(conf_mat)
#plt.show()
plt.xticks(np.arange(0,101,5))
plt.yticks(np.arange(0,101,5))

plt.savefig(os.path.basename(file_name).strip().split('.')[0] + ".png")

ind_ranking = conf_mat.argsort()[:, -1:-6:-1]
conf_mat.sort()
Пример #10
0
    print("--- Evaluation time: %s seconds ---" % (time.time() - start_time))

    # print(idxs)
    true_vals = true_vals.tolist()
    # print(true_vals)

    print('Classification report (BERT) ---------------------------')
    print(
        metrics.classification_report(true_vals,
                                      idxs,
                                      digits=3,
                                      zero_division=0))

    # Confusion matrix
    class_names = [class_dict[x] for x in list(set(class_test))]
    get_confusion_matrix(true_vals, idxs, class_names)

    # --- Similarities -----------------------------------------------------------

    if use_response_similarity or use_book_similarity:
        print("--- SIMILARITIES ---")

        pred_train = class_train
        pred_test = idxs
        tfidf_vectorizer = None

        if len(class_test) % 2 != 0:
            class_test = class_test[:-1]

        if use_bert:
            print("Use BERT embeddings for similarity")
Пример #11
0
import matplotlib.pyplot as plt
from confusion_matrix import get_confusion_matrix
import argparse

parser = argparse.ArgumentParser()
parser.add_argument('filename',
                    help='result file with label and first 5 predicted class')
args = parser.parse_args()

if args.filename:
    file_name = args.filename
else:
    file_name = 'object/obj_res_real.txt'

class_num = 101
conf_mat = get_confusion_matrix(file_name, class_num)

sum_per_row = np.sum(conf_mat, axis=1)
conf_mat = (0.0 + conf_mat) / sum_per_row[:, np.newaxis]
#print " ".join(str(ele) for ele in np.sum(conf_mat, axis=1))

plt.matshow(conf_mat)
#plt.show()
plt.xticks(np.arange(0, 101, 5))
plt.yticks(np.arange(0, 101, 5))

plt.savefig(os.path.basename(file_name).strip().split('.')[0] + ".png")

ind_ranking = conf_mat.argsort()[:, -1:-6:-1]
conf_mat.sort()
value_ranking = conf_mat[:, -1:-6:-1]