Пример #1
0
def post_process(predict_file):
    test_entity = load_pkl('test_doc_entity_list')
    true_y=load_pkl('tests_y')
    file = open('../result/' + predict_file, 'rb')
    predict = pickle.load(file)
    file.close()
    test = load_pkl('test_doc_entity')

    y,y_dict=ansisy_predict(predict,test_entity)
    doc_dict=ansis_doc(test)
    new_y = match(y,y_dict,test_entity,doc_dict)
    # p, r, f = f1(true_y, (predict > 0.5).astype(int))
    p1, r1, f_1 = f1(true_y, new_y)
    # print(f)
    print(f_1)
    print("///////")
    return new_y,true_y
Пример #2
0
    def delete_dev_neg_ins(self, dev_x, dev_y):
        deletelist = load_pkl('dev_delete_list')
        positive = 0
        negetive = 0
        nopair = 0

        # 倒序进行删除
        redelete = list(reversed(deletelist))
        for p in redelete:
            print('del')
            del dev_x[p]
            del dev_y[p]

        print("finish")
        print("pos" + str(positive))
        print("neg" + str(negetive))
        print("nopair" + str(nopair))
        return dev_x, dev_y
Пример #3
0
    out = Dense(1, activation='sigmoid', name='output')(res)
    model = Model([inputs_indices, inputs_segment, entity1_mask, entity2_mask],
                  out)

    model.summary()
    model.compile(optimizer='rmsprop',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    return model


if __name__ == "__main__":

    train_entity1_pos = load_pkl("train_pos1")
    dev_entity1_pos = load_pkl("dev_pos1")
    test_entity1_pos = load_pkl("test_pos1")
    train_entity2_pos = load_pkl("train_pos2")
    dev_entity2_pos = load_pkl("dev_pos2")
    test_entity2_pos = load_pkl("test_pos2")

    train_x_indices = load_pkl('train_indices')
    train_x_segments = load_pkl('train_segments')
    train_y = load_pkl('train_y')
    dev_x_indices = load_pkl('dev_indices')
    dev_x_segments = load_pkl('dev_segments')
    dev_y = load_pkl('dev_y')
    test_x_indices = load_pkl('test_indices')
    test_x_segments = load_pkl('test_segments')
    test_y = load_pkl('tests_y')
Пример #4
0
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    return model


if __name__ == "__main__":

    # train_entity1_pos = load_pkl("train_pos1")
    # dev_entity1_pos = load_pkl("dev_pos1")
    # test_entity1_pos = load_pkl("test_pos1")
    # train_entity2_pos = load_pkl("train_pos2")
    # dev_entity2_pos = load_pkl("dev_pos2")
    # test_entity2_pos = load_pkl("test_pos2")

    train_x_indices = load_pkl('train_indices')
    train_x_segments = load_pkl('train_segments')
    train_y = load_pkl('train_y')
    dev_x_indices = load_pkl('dev_indices')
    dev_x_segments = load_pkl('dev_segments')
    dev_y = load_pkl('dev_y')
    test_x_indices = load_pkl('test_indices')
    test_x_segments = load_pkl('test_segments')
    test_y = load_pkl('tests_y')

    train = []
    for i in range(len(train_x_indices)):
        temp = []
        temp.append(train_x_indices[i])
        temp.append(train_x_segments[i])
        # temp.append(train_entity1_pos[i])
Пример #5
0
        for sen in doc:
            se += 1
            leng = sen.split(" ")
            max_len = max(len(leng), max_len)
            if len(leng) < l:
                ll += 1

    print("max_sen", max_sen)
    print("max_len", max_len)
    print("ss", ss)
    print("ll", ll)
    print("sen", se)


if __name__ == "__main__":
    train_text = load_pkl("train_sentence")
    train_pos1 = load_pkl("entity1_pos")
    train_pos2 = load_pkl("entity2_pos")
    train_indices = load_pkl("position1")
    train_segments = load_pkl("position2")
    print()
    # myreader = Reader()

    # train_instances, train_y, train_entity ,train_doc= myreader.read_file(name='train')
    # dev_instances, dev_y, dev_entity, dev_doc = myreader.read_file(name='dev')
    # test_instances, tests_y, test_entity,test_doc = myreader.read_file(name='test')
    #
    # train_instances,train_y, dev_instances, dev_y,train_entity,dev_entity = myreader.resign_dev(train_instances, dev_instances, train_y, dev_y, train_entity,dev_entity)  # 20%
    #
    # train_all_doc, train_max_sentence = myreader.divide_sentence(train_instances)
    # dev_all_doc, dev_max_sentence = myreader.divide_sentence(dev_instances)
Пример #6
0

def topk():
    x = [100, 200, 300, 400, 500, 600]
    y = [57.55, 59.01, 56.46, 60.09, 58.38, 56.92]
    plt.figure(figsize=(8, 4))  # 创建绘图对象
    plt.plot(x, y, "b--", linewidth=1)  # 在当前绘图对象绘图(X轴,Y轴,蓝色虚线,线宽度)
    plt.xlabel("K")  # X轴标签
    plt.ylabel("F(%)")  # Y轴标签
    plt.title("the performance comparison")  # 图标题
    plt.show()  # 显示图
    plt.savefig("topk_k.jpg")  # 保存图


if __name__ == "__main__":
    attention = load_pkl('entity_align')
    # temp=[2.702659,3.7815566,3.170545 ]
    # a.append(temp)
    # temp = [1.0777982,1.203137,1.1610005]
    # a.append(temp)
    # temp = [1.0904192,0.62009156,1.1182997]
    # a.append(temp)
    # temp = [3.3131166,4.273107,3.8353167]
    # a.append(temp)
    # temp = [2.820662,2.4218264,3.0692577]
    # a.append(temp)
    # temp = [1.5665073,1.5812267,1.740691]
    a = []
    temp = [3.170545, 1.1610005, 1.1182997, 3.8353167, 3.0692577, 1.740691]
    a.append(temp)
    temp = [3.7815566, 1.203137, 0.62009156, 4.273107, 2.4218264, 1.5812267]
Пример #7
0
    new_pre=[]
    new_y=[]
    index=0
    for i in range(len(predict)):
        if i not in delete_list:
            new_pre.append(predict[i])
            new_y.append(true_y[i])


    new_pre=np.array(new_pre)
    new_y = np.array(new_y)

    # p, r, f = f1(true_y, (predict > 0.5).astype(int))
    p1, r1, f_1 = f1(new_y, (new_pre > 0.5).astype(int))
    # print(f)
    print(f_1)
if __name__ == "__main__":
    predict='0.6009433962264151_predict'
    new_y,predict_y=post_process(predict)
    test_entity = load_pkl('test_doc_entity_list')
    train_entity = load_pkl('train_doc_entity_list')
    dev_entity = load_pkl('dev_doc_entity_list')
    mesh_dict = mesh_id_dict()
    instance_entity_mesh = get_entity_mesh(mesh_dict,test_entity)
    delete_list = delete_special_instance(instance_entity_mesh)
    train_entity_mesh = get_entity_mesh(mesh_dict, train_entity)
    train_delete_list = delete_special_instance(train_entity_mesh)
    dev_entity_mesh = get_entity_mesh(mesh_dict, dev_entity)
    dev_delete_list = delete_special_instance(dev_entity_mesh)
    #
    post2(predict,delete_list,new_y,predict_y)
            if (predict[i] > 0.5):
                intre_predict.append(1)
            else:
                intre_predict.append(0)

    true_y = true_y.tolist()
    for i in range(len(true_y)):
        if i in list:
            temp = true_y[i][0]
            intre_true_y.append(true_y[i][0])

    return intre_predict, intre_true_y


if __name__ == "__main__":
    test_inter_sen_list = load_pkl('test_delete_list')
    list = test_inter_sen_list[0]
    file = open('../result/' + '0.5632286995515694_predict', 'rb')
    predict = pickle.load(file)
    file.close()
    true_y = load_pkl('tests_y')

    intra_predict, intra_true_y = intra_sen_classify(list, predict, true_y)
    intre_predict, intre_true_y = inter_sen_classify(list, predict, true_y)

    p1, r1, f_1 = f1(intra_true_y, intra_predict)
    print(f_1)

    p2, r2, f_2 = f1(intre_true_y, intre_predict)
    print(f_2)