# cnn2vec = loadData_DKRL("./data/1000nepoches/res_3_4_1000/cnn_vec_out.txt", "./data/train_data/entity2id.txt") # rel2vec = loadData_DKRL("./data/1000nepoches/res_3_4_1000/relation2vec.bern", "./data/train_data/relation2id.txt") # stru2vec = loadData_DKRL("./data/1000nepoches/res_3_4_1000/entity2vec.bern", "./data/train_data/entity2id.txt") # test_num, test = openTxt.openTrain("./data/train_data/test_3_4.txt") # train_num, train = openTxt.openTrain("./data/train_data/train_3_4.txt") cnn2vec = loadData_DKRL( "./data/EAnci/1000nepoches/res_4_7_1000/cnn_vec_out.txt", "./data/train_data/entity2id.txt") rel2vec = loadData_DKRL( "./data/EAnci/1000nepoches/res_4_7_1000/relation2vec.bern", "./data/train_data/relation2id.txt") stru2vec = loadData_DKRL( "./data/EAnci/1000nepoches/res_4_7_1000/entity2vec.bern", "./data/train_data/entity2id.txt") test_num, test = openTxt.openTrain("./data/train_data/test_4_7.txt") train_num, train = openTxt.openTrain("./data/train_data/train_4_7.txt") ###new_y_predict = SameInheritClass_Recovery.SameInheritClassRecovery(x_test_triple,y_predict,train) ###precissio, recall, acc, f1 = Result.get_result(y_test, new_y_predict) ###三元组分类 # print("rate = {}".format(0.85)) # y_predict_cnn,y_predict_multi = TripleClassfication.TripleClassfication(train,x_test_triple,y_predict,cnn2vec,stru2vec,rel2vec,0.85) # precissio, recall, acc, f1 = Result.get_result(y_test, y_predict_cnn) # precissio, recall, acc, f1 = Result.get_result(y_test, y_predict_multi) sum1, sum2, sum3, sum4 = 0, 0, 0, 0 sum5, sum6, sum7, sum8 = 0, 0, 0, 0 for i in range(0, 10): x_train, y_train, x_test, y_test, x_test_triple = generateClassiferData( train, test, cnn2vec, stru2vec, 1, (4 / 7)) y_predict = Phase3Classifier.train(x_train, y_train, x_test, y_test, 1)
discriminant_analysis, random_projection) if __name__ == '__main__': cnn2vec = eTour_Experiment.loadData_DKRL( "./data/eTour/dkrl_result/res_eTour_1200_100_H_M_Improve/cnn_vec_out.txt", "./data/eTour/train_data/entity2id.txt") rel2vec = eTour_Experiment.loadData_DKRL( "./data/eTour/dkrl_result/res_eTour_1200_100_H_M_Improve/relation2vec.bern", "./data/eTour/train_data/relation2id.txt") stru2vec = eTour_Experiment.loadData_DKRL( "./data/eTour/dkrl_result/res_eTour_1200_100_H_M_Improve/entity2vec.bern", "./data/eTour/train_data/entity2id.txt") # test_num, test = openTxt.openTrain("./data/eTour/train_data/testZ.txt") # train_num, train = openTxt.openTrain("./data/eTour/train_data/trainZ.txt") # oracle_num,oracleList = openTxt.openOracle("./data/eTour/oracle/oracle line.txt") dkrl_train_num, dkrl_train = openTxt.openTrain( "./data/eTour/train_data/DKRL_trainZ.txt") dkrl_test_num, dkrl_test = openTxt.openTrain( "./data/eTour/train_data/DKRL_testZ.txt") trainZ_N_num, trainZ_N = openTxt.openTrain( "./data/eTour/train_data/trainZ_N.txt") ClassiferTestTriples_Num, ClassiferTestTriples = openTxt.openTrain( "./data/eTour/train_data/Classifer_test_Z.txt") entity_vec = [] entity_type = [] for triple in dkrl_train + trainZ_N: if triple[2] == "oracle_link": if stru2vec[triple[0]] not in entity_vec: entity_vec.append(stru2vec[triple[0]]) entity_type.append(1) if stru2vec[triple[1]] not in entity_vec: entity_vec.append(stru2vec[triple[1]])
if w in bigWord: newText += ("_" + w) else: newText += w temp_list = newText.split("_") word_list = [] for word in temp_list: if len(word) == 0: continue else: word_list.append(word) return word_list if __name__ == '__main__': xmlParseTripleNum, xmlParseTriple = openTxt.openTrain( "./data/eTour/eTour_xmlParseResult.txt") code_entity_list = [] word_dictionary = [] for triple in xmlParseTriple: if triple[0] not in code_entity_list: code_entity_list.append(triple[0]) if triple[1] not in code_entity_list: code_entity_list.append(triple[1]) code_entity_word_dict = {} for entity in code_entity_list: code_entity_word_dict[entity] = NameSplit(entity) for word in code_entity_word_dict[entity]: if word not in word_dictionary: word_dictionary.append(word) word_num, word_i2e_dict = openTxt.openEntity2Des( "./data/convert_word_list.txt")
import nltk import Tool import openTxt import random if __name__ == '__main__': file_num, eTourCC2class = openTxt.eTourCC2class_open( "./data/eTour/eTour_ea2class.txt") word_num, codeEntityWords = openTxt.openTrain( "./data/eTour/prosessData/codeEntityWords.txt") word_num, codeEntity2Words = openTxt.openEntity2Des( "./data/eTour/prosessData/codeEntityWords.txt") xmlParser_num, xmlParser = openTxt.openTrain( "./data/eTour/eTour_xmlParseResult.txt") oracle_num, temp_oracleList = openTxt.openOracle( "./data/eTour/oracle/oracle line.txt") ucLineNum, uc2ucLine = openTxt.openTrain( "./data/eTour/prosessData/uc2ucLink.txt") uc_num, ucEntityWords = openTxt.openTrain( "./data/eTour/prosessData/ucEntityWords.txt") oracleList = [] # ucName2file = {} # for file in eTourCC2class: # ucName2file[eTourCC2class[file]["class"][0]] = file # print(ucName2file["Beniculturali"]) dirList = ["One", "Two", "Three"] crossValidationNum = 3 for triple in temp_oracleList: oracleList.append( (triple[0], eTourCC2class[triple[1]]["class"][0], triple[2]))
import openTxt if __name__ == '__main__': triple_num,triple_List = openTxt.openTrain("./data/entityWords.txt") word_list = [] for triple in triple_List: for word in triple[2].strip().split(" "): if word not in word_list: word_list.append(word) with open("./data/word2id.txt","w",encoding="utf-8") as file: for index,word in enumerate(word_list): file.write(word+"\t"+str(index)+"\n")
import nltk import NameSplit import openTxt import Tool import re if __name__ == '__main__': class_num,class2comment = openTxt.openDescription("./data/eTour/class2comment.txt") file_num,eTourCC2class = openTxt.eTourCC2class_open("./data/eTour/eTour_ea2class.txt") entity_num,entity2word = openTxt.openEntity2Des("./data/eTour/code_entity_word_dict.txt") word_num,temp_word_i2e_dict = openTxt.openEntity2Des("./data/convert_word_list.txt") uc_num,ucEntityWords = openTxt.openTrain("./data/eTour/prosessData/uc2des.txt") word_i2e_dict = {} saveEnglish = re.compile("[^a-zA-Z]") classList = [tu[0] for tu in class2comment] for key in entity2word: if key in classList: index = classList.index(key) class2comment[index][1] += (" "+key) else: class2comment.append((key,key)) for key in temp_word_i2e_dict.keys(): if key not in word_i2e_dict.keys(): word_i2e_dict[key] = temp_word_i2e_dict[key] if key.lower() not in word_i2e_dict.keys(): word_i2e_dict[key.lower()] = temp_word_i2e_dict[key] if key.capitalize() not in word_i2e_dict.keys(): word_i2e_dict[key.capitalize()] = temp_word_i2e_dict[key]