Exemplo n.º 1
0
    # cnn2vec = loadData_DKRL("./data/1000nepoches/res_3_4_1000/cnn_vec_out.txt", "./data/train_data/entity2id.txt")
    # rel2vec = loadData_DKRL("./data/1000nepoches/res_3_4_1000/relation2vec.bern", "./data/train_data/relation2id.txt")
    # stru2vec = loadData_DKRL("./data/1000nepoches/res_3_4_1000/entity2vec.bern", "./data/train_data/entity2id.txt")
    # test_num, test = openTxt.openTrain("./data/train_data/test_3_4.txt")
    # train_num, train = openTxt.openTrain("./data/train_data/train_3_4.txt")

    cnn2vec = loadData_DKRL(
        "./data/EAnci/1000nepoches/res_4_7_1000/cnn_vec_out.txt",
        "./data/train_data/entity2id.txt")
    rel2vec = loadData_DKRL(
        "./data/EAnci/1000nepoches/res_4_7_1000/relation2vec.bern",
        "./data/train_data/relation2id.txt")
    stru2vec = loadData_DKRL(
        "./data/EAnci/1000nepoches/res_4_7_1000/entity2vec.bern",
        "./data/train_data/entity2id.txt")
    test_num, test = openTxt.openTrain("./data/train_data/test_4_7.txt")
    train_num, train = openTxt.openTrain("./data/train_data/train_4_7.txt")

    ###new_y_predict = SameInheritClass_Recovery.SameInheritClassRecovery(x_test_triple,y_predict,train)
    ###precissio, recall, acc, f1 = Result.get_result(y_test, new_y_predict)
    ###三元组分类
    # print("rate = {}".format(0.85))
    # y_predict_cnn,y_predict_multi = TripleClassfication.TripleClassfication(train,x_test_triple,y_predict,cnn2vec,stru2vec,rel2vec,0.85)
    # precissio, recall, acc, f1 = Result.get_result(y_test, y_predict_cnn)
    # precissio, recall, acc, f1 = Result.get_result(y_test, y_predict_multi)
    sum1, sum2, sum3, sum4 = 0, 0, 0, 0
    sum5, sum6, sum7, sum8 = 0, 0, 0, 0
    for i in range(0, 10):
        x_train, y_train, x_test, y_test, x_test_triple = generateClassiferData(
            train, test, cnn2vec, stru2vec, 1, (4 / 7))
        y_predict = Phase3Classifier.train(x_train, y_train, x_test, y_test, 1)
Exemplo n.º 2
0
                     discriminant_analysis, random_projection)

if __name__ == '__main__':
    cnn2vec = eTour_Experiment.loadData_DKRL(
        "./data/eTour/dkrl_result/res_eTour_1200_100_H_M_Improve/cnn_vec_out.txt",
        "./data/eTour/train_data/entity2id.txt")
    rel2vec = eTour_Experiment.loadData_DKRL(
        "./data/eTour/dkrl_result/res_eTour_1200_100_H_M_Improve/relation2vec.bern",
        "./data/eTour/train_data/relation2id.txt")
    stru2vec = eTour_Experiment.loadData_DKRL(
        "./data/eTour/dkrl_result/res_eTour_1200_100_H_M_Improve/entity2vec.bern",
        "./data/eTour/train_data/entity2id.txt")
    # test_num, test = openTxt.openTrain("./data/eTour/train_data/testZ.txt")
    # train_num, train = openTxt.openTrain("./data/eTour/train_data/trainZ.txt")
    # oracle_num,oracleList = openTxt.openOracle("./data/eTour/oracle/oracle line.txt")
    dkrl_train_num, dkrl_train = openTxt.openTrain(
        "./data/eTour/train_data/DKRL_trainZ.txt")
    dkrl_test_num, dkrl_test = openTxt.openTrain(
        "./data/eTour/train_data/DKRL_testZ.txt")
    trainZ_N_num, trainZ_N = openTxt.openTrain(
        "./data/eTour/train_data/trainZ_N.txt")
    ClassiferTestTriples_Num, ClassiferTestTriples = openTxt.openTrain(
        "./data/eTour/train_data/Classifer_test_Z.txt")
    entity_vec = []
    entity_type = []
    for triple in dkrl_train + trainZ_N:
        if triple[2] == "oracle_link":
            if stru2vec[triple[0]] not in entity_vec:
                entity_vec.append(stru2vec[triple[0]])
                entity_type.append(1)
            if stru2vec[triple[1]] not in entity_vec:
                entity_vec.append(stru2vec[triple[1]])
Exemplo n.º 3
0
        if w in bigWord:
            newText += ("_" + w)
        else:
            newText += w
    temp_list = newText.split("_")
    word_list = []
    for word in temp_list:
        if len(word) == 0:
            continue
        else:
            word_list.append(word)
    return word_list


if __name__ == '__main__':
    xmlParseTripleNum, xmlParseTriple = openTxt.openTrain(
        "./data/eTour/eTour_xmlParseResult.txt")
    code_entity_list = []
    word_dictionary = []
    for triple in xmlParseTriple:
        if triple[0] not in code_entity_list:
            code_entity_list.append(triple[0])
        if triple[1] not in code_entity_list:
            code_entity_list.append(triple[1])
    code_entity_word_dict = {}
    for entity in code_entity_list:
        code_entity_word_dict[entity] = NameSplit(entity)
        for word in code_entity_word_dict[entity]:
            if word not in word_dictionary:
                word_dictionary.append(word)
    word_num, word_i2e_dict = openTxt.openEntity2Des(
        "./data/convert_word_list.txt")
import nltk
import Tool
import openTxt
import random

if __name__ == '__main__':
    file_num, eTourCC2class = openTxt.eTourCC2class_open(
        "./data/eTour/eTour_ea2class.txt")
    word_num, codeEntityWords = openTxt.openTrain(
        "./data/eTour/prosessData/codeEntityWords.txt")
    word_num, codeEntity2Words = openTxt.openEntity2Des(
        "./data/eTour/prosessData/codeEntityWords.txt")
    xmlParser_num, xmlParser = openTxt.openTrain(
        "./data/eTour/eTour_xmlParseResult.txt")
    oracle_num, temp_oracleList = openTxt.openOracle(
        "./data/eTour/oracle/oracle line.txt")
    ucLineNum, uc2ucLine = openTxt.openTrain(
        "./data/eTour/prosessData/uc2ucLink.txt")
    uc_num, ucEntityWords = openTxt.openTrain(
        "./data/eTour/prosessData/ucEntityWords.txt")
    oracleList = []
    # ucName2file = {}
    # for file in eTourCC2class:
    #     ucName2file[eTourCC2class[file]["class"][0]] = file
    # print(ucName2file["Beniculturali"])
    dirList = ["One", "Two", "Three"]
    crossValidationNum = 3

    for triple in temp_oracleList:
        oracleList.append(
            (triple[0], eTourCC2class[triple[1]]["class"][0], triple[2]))
Exemplo n.º 5
0
import openTxt
if __name__ == '__main__':
    triple_num,triple_List = openTxt.openTrain("./data/entityWords.txt")
    word_list = []
    for triple in triple_List:
        for word in triple[2].strip().split(" "):
            if word not in word_list:
                word_list.append(word)

    with open("./data/word2id.txt","w",encoding="utf-8") as file:
        for index,word in enumerate(word_list):
            file.write(word+"\t"+str(index)+"\n")
Exemplo n.º 6
0
import nltk
import NameSplit
import openTxt
import Tool
import re

if __name__ == '__main__':
    class_num,class2comment = openTxt.openDescription("./data/eTour/class2comment.txt")
    file_num,eTourCC2class = openTxt.eTourCC2class_open("./data/eTour/eTour_ea2class.txt")
    entity_num,entity2word = openTxt.openEntity2Des("./data/eTour/code_entity_word_dict.txt")
    word_num,temp_word_i2e_dict = openTxt.openEntity2Des("./data/convert_word_list.txt")
    uc_num,ucEntityWords = openTxt.openTrain("./data/eTour/prosessData/uc2des.txt")

    word_i2e_dict = {}
    saveEnglish = re.compile("[^a-zA-Z]")
    classList = [tu[0] for tu in class2comment]
    for key in entity2word:
        if key in classList:
            index = classList.index(key)
            class2comment[index][1] += (" "+key)
        else:
            class2comment.append((key,key))

    for key in temp_word_i2e_dict.keys():
        if key not in word_i2e_dict.keys():
            word_i2e_dict[key] = temp_word_i2e_dict[key]
        if key.lower() not in word_i2e_dict.keys():
            word_i2e_dict[key.lower()] = temp_word_i2e_dict[key]
        if key.capitalize() not in word_i2e_dict.keys():
            word_i2e_dict[key.capitalize()] = temp_word_i2e_dict[key]