def easyquestion_friendlyname_pos_entity(): easyquestion_friendlyname_pos_entity = dict() easyquestion_posword = read_posques_posword( "..\\data\\test\\test.easy.quespos.posword") question_fnentity = read_ques_fn_entity( "..\\data\\test\\test.question.friendlyname.entity") pos_contained_set = set() pos_combination_set = set() for ques in easyquestion_posword: fnentity = question_fnentity[ques] # if(len(fnentity)!=1): # print(ques+"\t"+fnentity) posword = easyquestion_posword[ques] fnentity_word_pos = fnentity[0] # print(fnentity_word_pos) pos_ques = "" pos_combination = "" for pos_word in posword: pos = pos_word.split("\t")[0] pos_ques = pos_ques + pos + "\t" word = pos_word.split("\t")[1] if word in fnentity[0]: fnentity_word_pos = fnentity_word_pos + "\t" + word + "\t" + pos + "###" pos_contained_set.add(pos) pos_combination = pos_combination + pos + "\t" pos_combination_set.add(pos_combination) easyquestion_friendlyname_pos_entity[ques + "###" + pos_ques] = fnentity_word_pos write_dict_str(easyquestion_friendlyname_pos_entity, "..\\data\\test\\test.easyquespos.friename.wordpos") print(pos_contained_set) print(pos_combination_set)
def question_friendlyname_pos_entity(): question_friendlyname_pos_entity=dict() question_posword = read_posques_posword("..\\data\\cluster\\quespos_posword") question_fnentity=read_ques_fn_entity("..\\data\\cluster\\train.question.friendlyname.entity") pos_contained_set=set() pos_combination_set=set() for ques in question_posword: fnentity=question_fnentity[ques] # if(len(fnentity)!=1): # print(ques+"\t"+fnentity) posword=question_posword[ques] fnentity_word_pos_list=list() for fnentity_one in fnentity: pos_ques = "" fnentity_word_pos=fnentity_one # print(fnentity_word_pos) pos_combination="" for pos_word in posword: pos=pos_word.split("\t")[0] pos_ques=pos_ques+pos+"\t" word=pos_word.split("\t")[1] if word in fnentity_one: fnentity_word_pos=fnentity_word_pos+"\t"+word+"\t"+pos+"###" pos_contained_set.add(pos) pos_combination=pos_combination+pos+"\t" pos_combination_set.add(pos_combination) fnentity_word_pos_list.append(fnentity_word_pos) question_friendlyname_pos_entity[ques+"###"+pos_ques]=fnentity_word_pos_list write_dict(question_friendlyname_pos_entity,"..\\data\\cluster\\train.easyquespos.friename.wordpos") print(pos_contained_set) print(pos_combination_set)
def question_friendlynamejinsuo_pos_entity(): # question_posword = read_posques_posword("Users\\lanlanzh\\kbqa_python\\data\\test\\test.easy.quespos.posword") question_posword = read_posques_posword("../data/cluster/train.quespos.posword") question_fnentity=read_ques_fn_entity("../data/cluster/train.question.friendlyname.entity") mention_map_pos_com=set() for ques in question_posword: fnentity=question_fnentity[ques] posword=question_posword[ques] fnentity_word_pos_list=list() for fnentity_one in fnentity: hit=False friendlyname=fnentity_one.split("\t")[0] entity=fnentity_one.split("\t")[1] friendlyname_jinsuo=friendlyname.replace(" ","") size_posword=len(posword) for i in range(0,size_posword): pos_comb="" word_comb="" pos=posword[i].split("\t")[0] word=posword[i].split("\t")[1] if word == friendlyname_jinsuo: hit=True mention_map_pos_com.add(pos) elif word == friendlyname_jinsuo+"s": hit=True mention_map_pos_com.add(pos) elif word == friendlyname_jinsuo+".": hit=True mention_map_pos_com.add(pos) elif word in friendlyname_jinsuo: pos_comb=pos_comb+pos+"\t" word_comb=word_comb+word for j in range(i+1,size_posword): # print("word_comb"+word_comb) pos_j = posword[j].split("\t")[0] word_j = posword[j].split("\t")[1] word_comb=word_comb+word_j # print("word_comb" + word_comb) pos_comb=pos_comb+pos_j+"\t" if word_comb==friendlyname_jinsuo: hit=True mention_map_pos_com.add(pos_comb) break elif word_comb==friendlyname_jinsuo+"s": hit=True mention_map_pos_com.add(pos_comb) break elif word_comb==friendlyname_jinsuo+".": hit=True mention_map_pos_com.add(pos_comb) break elif word_comb not in friendlyname_jinsuo: break if hit==False: print(fnentity_one+"\t"+ques) print(posword) write_set(mention_map_pos_com,"../data/cluster/train.mention.pos.scomposition") return mention_map_pos_com
def train_mention_pos_equal_position(): question_posword = read_posques_posword( "../data/test/test.easy.quespos.posword") train_question_posword = read_posques_posword( "../data/cluster/train.quespos.posword") train_question_fnentity = read_ques_fn_entity( "../data/cluster/train.question.friendlyname.entity") # test_question_fnentity = read_ques_fn_entity("../data/test/test.question.friendlyname.entity") ques_position_pos_equal = mention_position_pos_equal( question_posword, train_question_posword, train_question_fnentity) return ques_position_pos_equal
def entity_not_hit_question(): questions_not_hit = set() ques_entity_pros = hit_by_np() question_fnentity = read_ques_fn_entity( "..\\data\\test\\test.question.friendlyname.entity") for ques in ques_entity_pros: entity_pros = ques_entity_pros[ques] friendlyname_entity = question_fnentity[ques] entity_goal = friendlyname_entity[0].split("\t")[1] if entity_goal not in entity_pros: questions_not_hit.add(ques) write_set(questions_not_hit, "..\\data\\test\\test.easy.ques.np_not_hit") return questions_not_hit
def entity_pro_hit_question(): ques_entity_pro_hit = dict() ques_entity_pros = hit_by_np() question_fnentity = read_ques_fn_entity( "..\\data\\test\\test.question.friendlyname.entity") for ques in ques_entity_pros: entity_pros = ques_entity_pros[ques] friendlyname_entity = question_fnentity[ques] entity_goal = friendlyname_entity[0].split("\t")[1] if entity_goal in entity_pros: ques_entity_pro_hit[ques] = str(entity_goal) + "\t" + str( entity_pros[entity_goal]) write_dict_str(ques_entity_pro_hit, "..\\data\\test\\test.easy.ques.np_hit_entity_pro") return ques_entity_pro_hit
def train_frname_in_test_position(): ques_frname_in_position = dict() question_posword = read_posques_posword( "../data/test/test.easy.quespos.posword") train_question_fnentity = read_ques_fn_entity( "../data/cluster/train.question.friendlyname.entity") fnentitys = set() for train_ques in train_question_fnentity: fnentity = train_question_fnentity[train_ques] fnentitys = fnentitys | set(fnentity) for ques in question_posword: posword = question_posword[ques] position_i_j = train_frname_position(fnentitys, posword) ques_frname_in_position[ques] = position_i_j return ques_frname_in_position
def match_by_friendlyname(): question_posword = read_posques_posword( "../data/test/test.easy.quespos.posword") test_question_fnentity = read_ques_fn_entity( "../data/test/test.question.friendlyname.entity") entity_match = dict() for question in question_posword: fnentity_test = test_question_fnentity[question] posword = question_posword[question] word_list = posword_wordlist(posword) phrases = combine_wordlist(word_list) phrase_frnentity = friendlyname_entity_match(phrases) if len(phrase_frnentity) > 0: # a=1 entity_match[question + "###" + "\t".join(fnentity_test)] = phrase_frnentity return entity_match
def test_easy_mention_position(): question_posword = read_posques_posword( "..\\data\\test\\test.easy.quespos.posword") question_fnentity = read_ques_fn_entity( "..\\data\\test\\test.question.friendlyname.entity") position_question_posword = dict() for ques in question_posword: position = list() fnentity = question_fnentity[ques] posword = question_posword[ques] if len(fnentity) != 1: print(ques) for fnentity_one in fnentity: hit = False friendlyname = fnentity_one.split("\t")[0] entity = fnentity_one.split("\t")[1] friendlyname_jinsuo = friendlyname.replace(" ", "") friendlyname_jinsuos = friendlyname_jinsuo + "s" friendlyname_jinsuodot = friendlyname_jinsuo + "." size_posword = len(posword) for i in range(0, size_posword): pos_comb = "" word_comb = "" pos = posword[i].split("\t")[0] word = posword[i].split("\t")[1] word = word.replace("`", "'") if (word == friendlyname_jinsuo) | ( word == friendlyname_jinsuos) | ( word == friendlyname_jinsuodot): hit = True position.append(str(i)) elif (word in friendlyname_jinsuo) | ( word in friendlyname_jinsuos) | ( word in friendlyname_jinsuodot): pos_comb = pos_comb + pos + "\t" word_comb = word_comb + word for j in range(i + 1, size_posword): # print("word_comb"+word_comb) pos_j = posword[j].split("\t")[0] word_j = posword[j].split("\t")[1] word_j = word_j.replace("`", "'") word_comb = word_comb + word_j # print("word_comb" + word_comb) pos_comb = pos_comb + pos_j + "\t" if (word_comb == friendlyname_jinsuo) | ( word_comb == friendlyname_jinsuos) | ( word_comb == friendlyname_jinsuodot): hit = True position.append("\t".join([str(i), str(j)])) break elif (word_comb not in friendlyname_jinsuo) & ( word_comb not in friendlyname_jinsuos) & ( word_comb not in friendlyname_jinsuodot): break if hit == False: print(friendlyname) print(ques) if len(position) != 1: print(position) print(friendlyname) print(ques) if position[0] in position_question_posword: question_poswords = position_question_posword[position[0]] question_poswords.add(ques + "###" + "\t".join(question_posword[ques])) position_question_posword[position[0]] = question_poswords else: question_poswords = set() question_poswords.add(ques + "###" + "\t".join(question_posword[ques])) position_question_posword[position[0]] = question_poswords position_question_posword_sort = dict( sorted(position_question_posword.items(), key=lambda d: len(d[1]), reverse=True)) # write_dict(position_question_posword_sort, "..\\data\\test\\test.easy.position.mention") return position_question_posword
def conquer(): # question_posword=read_posques_posword("..\\data\\test\\test.quespos.posword") question_posword = read_posques_posword( "../data/test/test.easy.quespos.posword") train_question_posword = read_posques_posword( "../data/cluster/train.quespos.posword") train_question_fnentity = read_ques_fn_entity( "../data/cluster/train.question.friendlyname.entity") test_question_fnentity = read_ques_fn_entity( "../data/test/test.question.friendlyname.entity") entity_match = dict() test_pos_equal_mention = mention_pos_equal(question_posword, train_question_posword, train_question_fnentity) test_pos_similar_mention = mention_pos_similar(question_posword, train_question_posword, train_question_fnentity) for question in question_posword: fnentity_test = test_question_fnentity[question] posword = question_posword[question] word_list = posword_wordlist(posword) phrases = combine_wordlist(word_list) phrase_frnentity = friendlyname_entity_match(phrases) if len(phrase_frnentity) > 0: #a=1 entity_match[question + "###" + "\t".join(fnentity_test)] = phrase_frnentity elif question in test_pos_equal_mention: # a = 1 mention_possible = test_pos_equal_mention[question] pos_equal_phrase_entityall = dict() pos_equal_phrase_frnentity = friendlyname_entity_match( mention_possible) pos_equal_phrase_aliasentity = aliases_entity_match( mention_possible) pos_equal_phrase_nameentity = name_entity_match(mention_possible) pos_equal_phrase_cluewebentity = clueweb_entity_match( mention_possible) pos_equal_phrase_entityall = add_dict_dict( pos_equal_phrase_entityall, pos_equal_phrase_frnentity) pos_equal_phrase_entityall = add_dict_dict( pos_equal_phrase_entityall, pos_equal_phrase_aliasentity) pos_equal_phrase_entityall = add_dict_dict( pos_equal_phrase_entityall, pos_equal_phrase_nameentity) pos_equal_phrase_entityall = add_dict_dict( pos_equal_phrase_entityall, pos_equal_phrase_cluewebentity) entity_match[question + "###" + "\t".join(fnentity_test)] = pos_equal_phrase_entityall elif question in test_pos_similar_mention: mention_similar_possible = test_pos_similar_mention[question] pos_similar_phrase_entityall = dict() pos_similar_phrase_frnentity = friendlyname_entity_match( mention_similar_possible) pos_similar_phrase_aliasentity = aliases_entity_match( mention_similar_possible) pos_similar_phrase_nameentity = name_entity_match( mention_similar_possible) pos_similar_phrase_cluewebentity = clueweb_entity_match( mention_similar_possible) pos_similar_phrase_entityall = add_dict_dict( pos_similar_phrase_entityall, pos_similar_phrase_frnentity) pos_similar_phrase_entityall = add_dict_dict( pos_similar_phrase_entityall, pos_similar_phrase_aliasentity) pos_similar_phrase_entityall = add_dict_dict( pos_similar_phrase_entityall, pos_similar_phrase_nameentity) pos_similar_phrase_entityall = add_dict_dict( pos_similar_phrase_entityall, pos_similar_phrase_cluewebentity) entity_match[ question + "###" + "\t".join(fnentity_test)] = pos_similar_phrase_entityall for ques in entity_match: for phrase in entity_match[ques]: entity_pros = entity_match[ques][phrase] entity_pros = dict( sorted(entity_pros.items(), key=lambda d: d[1], reverse=True)) entity_match[ques][phrase] = entity_pros print(len(entity_match)) return entity_match