def visualize_attention(wts,x_test_pad,word_to_id,filename):
    wts_add = torch.sum(wts,1)
    wts_add_np = wts_add.data.numpy()
    wts_add_list = wts_add_np.tolist()
    id_to_word = {v:k for k,v in word_to_id.items()}
    text= []
    for test in x_test_pad:
        text.append(" ".join([id_to_word.get(i) for i in test]))
    createHTML(text, wts_add_list, filename)
    print("Attention visualization created for {} samples".format(len(x_test_pad)))
    return
Пример #2
0
def visualize_attention(attention_model, wts, x_test_pad, word_to_id,
                        word_to_word, y_test, filename):
    print(filename, "{} samples".format(len(x_test_pad)))

    labels = load_label_data(data_params['labels_csv'])

    wts_add = torch.sum(wts, 1)
    wts_add_np = wts_add.data.numpy()
    wts_add_list = wts_add_np.tolist()
    id_to_word = {v: word_to_word[k] for k, v in word_to_id.items()}
    result = []
    text = []
    correct = 0
    correct2 = 0
    n = 0
    for test in x_test_pad:
        attention_model.batch_size = 1
        attention_model.hidden_state = attention_model.init_hidden()
        x_test_var = Variable(torch.from_numpy(test).type(torch.LongTensor))
        y_test_pred, _ = attention_model(x_test_var)

        # 結果のリストを降順に並べる
        m = 0
        dic = {}
        for x in y_test_pred[0]:
            dic[x] = m
            m += 1
        yy = sorted(dic.items(), reverse=True)
        m = 0
        pred = []
        for y in yy:
            m += 1
            l = str(y[1])
            if len(labels) > 0:
                l = labels[y[1]]
            pred.append(l)
            if m > 5:
                break

        l = str(y_test[n])
        if len(labels) > 0:
            l = labels[y_test[n]]

        if l == pred[0]:
            correct += 1
        for r in pred:
            if l == r:
                correct2 += 1
                break

        text.append(" ".join([id_to_word.get(i) for i in test]))
        result.append([l, pred[0], pred])
        n += 1

    # print(text[0])

    # 20個ずつhtmlに出力
    m = 20
    n = len(result)
    for i in range(0, n, m):
        j = i + m
        createHTML(result[i:j], text[i:j], wts_add_list[i:j],
                   filename + '_' + str(i + 1) + '_' + str(j) + '.html')

    return (correct, correct2, result)
Пример #3
0
for i in x_test:
    np_sentence = np.asarray(x_test[i])
    np_labels = np.asarray(y_test[i])
    dtest = xgb.DMatrix(np_sentence)
    labels = bst.predict(dtest)
    #print(labels)
    y_pred_roc.extend(labels)
    #print(labels)
    y_pred.append([item for item in labels])
    y_test_prob.append(y_test[i])
    #print(y_test[i])
#y_pred_tensor = torch.FloatTensor()
#visualize_attention(y_pred_tensor,,)
text_flat = []
scores_flat = []

for test in corpus.test.words:
    text_flat.append(" ".join(test))
embed()
attention_visualization.createHTML(text_flat, y_pred, "res/xgboost.html")

y_pred_roc = np.array([1 if i >= 0.5 else 0 for i in y_pred_roc])
y_test_roc = []
for i in y_test_prob:
    y_test_roc.extend(i)
y_test_roc = np.array([1 if i >= 0.5 else 0 for i in y_test_roc])
match_M(y_pred, y_test_prob)
topK(y_pred, y_test_prob)
print(roc_auc_score(y_test_roc, y_pred_roc))

embed()