Пример #1
0
def train_mention_pos_equal_position():
    question_posword = read_posques_posword(
        "../data/test/test.easy.quespos.posword")
    train_question_posword = read_posques_posword(
        "../data/cluster/train.quespos.posword")
    train_question_fnentity = read_ques_fn_entity(
        "../data/cluster/train.question.friendlyname.entity")
    #  test_question_fnentity = read_ques_fn_entity("../data/test/test.question.friendlyname.entity")
    ques_position_pos_equal = mention_position_pos_equal(
        question_posword, train_question_posword, train_question_fnentity)
    return ques_position_pos_equal
Пример #2
0
def generate_question_entity_pro_relation_words():
    entity_match = conquer()
    question_posword = read_posques_posword(
        "../data/test/test.easy.quespos.posword")
    question_entity_pro_relation_words_concerned = dict()
    for ques_fnentity in entity_match:
        phrases = entity_match[ques_fnentity]
        ques = ques_fnentity.split("###")[0]
        posword = question_posword[ques]
        word_list = posword_wordlist(posword)
        for phrase in phrases:
            if (phrase != "on") & (phrase != "the") & (phrase != "of"):
                words_phrase = phrase.split(" ")
                # for word_phrase in set(words_phrase):
                #     if word_phrase in word_list:
                #         word_list.remove(word_phrase)
                word_relation_concerned = set(word_list)
                entity_pros = phrases[phrase]
                for entity in entity_pros:
                    pro = entity_pros[entity]
                    question_entity_pro = "###".join([ques, entity, str(pro)])
                    question_entity_pro_relation_words_concerned[
                        question_entity_pro] = word_relation_concerned
    write_dict(
        question_entity_pro_relation_words_concerned,
        "../data/relation/test.easy.partial.question_entity_pro_relation_words"
    )
    return question_entity_pro_relation_words_concerned
Пример #3
0
def easyquestion_friendlyname_pos_entity():
    easyquestion_friendlyname_pos_entity = dict()
    easyquestion_posword = read_posques_posword(
        "..\\data\\test\\test.easy.quespos.posword")
    question_fnentity = read_ques_fn_entity(
        "..\\data\\test\\test.question.friendlyname.entity")
    pos_contained_set = set()
    pos_combination_set = set()
    for ques in easyquestion_posword:
        fnentity = question_fnentity[ques]
        # if(len(fnentity)!=1):
        #     print(ques+"\t"+fnentity)
        posword = easyquestion_posword[ques]
        fnentity_word_pos = fnentity[0]
        # print(fnentity_word_pos)
        pos_ques = ""
        pos_combination = ""
        for pos_word in posword:
            pos = pos_word.split("\t")[0]
            pos_ques = pos_ques + pos + "\t"
            word = pos_word.split("\t")[1]
            if word in fnentity[0]:
                fnentity_word_pos = fnentity_word_pos + "\t" + word + "\t" + pos + "###"
                pos_contained_set.add(pos)
                pos_combination = pos_combination + pos + "\t"
        pos_combination_set.add(pos_combination)
        easyquestion_friendlyname_pos_entity[ques + "###" +
                                             pos_ques] = fnentity_word_pos
    write_dict_str(easyquestion_friendlyname_pos_entity,
                   "..\\data\\test\\test.easyquespos.friename.wordpos")
    print(pos_contained_set)
    print(pos_combination_set)
Пример #4
0
def question_friendlyname_pos_entity():
    question_friendlyname_pos_entity=dict()
    question_posword = read_posques_posword("..\\data\\cluster\\quespos_posword")
    question_fnentity=read_ques_fn_entity("..\\data\\cluster\\train.question.friendlyname.entity")
    pos_contained_set=set()
    pos_combination_set=set()
    for ques in question_posword:
        fnentity=question_fnentity[ques]
        # if(len(fnentity)!=1):
        #     print(ques+"\t"+fnentity)
        posword=question_posword[ques]
        fnentity_word_pos_list=list()
        for fnentity_one in fnentity:
            pos_ques = ""
            fnentity_word_pos=fnentity_one
           # print(fnentity_word_pos)

            pos_combination=""
            for pos_word in posword:
                pos=pos_word.split("\t")[0]
                pos_ques=pos_ques+pos+"\t"
                word=pos_word.split("\t")[1]
                if word in fnentity_one:
                    fnentity_word_pos=fnentity_word_pos+"\t"+word+"\t"+pos+"###"
                    pos_contained_set.add(pos)
                    pos_combination=pos_combination+pos+"\t"
            pos_combination_set.add(pos_combination)
            fnentity_word_pos_list.append(fnentity_word_pos)
        question_friendlyname_pos_entity[ques+"###"+pos_ques]=fnentity_word_pos_list
    write_dict(question_friendlyname_pos_entity,"..\\data\\cluster\\train.easyquespos.friename.wordpos")
    print(pos_contained_set)
    print(pos_combination_set)
Пример #5
0
def question_friendlynamejinsuo_pos_entity():

  #  question_posword = read_posques_posword("Users\\lanlanzh\\kbqa_python\\data\\test\\test.easy.quespos.posword")
    question_posword = read_posques_posword("../data/cluster/train.quespos.posword")
    question_fnentity=read_ques_fn_entity("../data/cluster/train.question.friendlyname.entity")
    mention_map_pos_com=set()
    for ques in question_posword:
        fnentity=question_fnentity[ques]
        posword=question_posword[ques]
        fnentity_word_pos_list=list()
        for fnentity_one in fnentity:
            hit=False
            friendlyname=fnentity_one.split("\t")[0]
            entity=fnentity_one.split("\t")[1]
            friendlyname_jinsuo=friendlyname.replace(" ","")
            size_posword=len(posword)
            for i in range(0,size_posword):
                pos_comb=""
                word_comb=""
                pos=posword[i].split("\t")[0]
                word=posword[i].split("\t")[1]
                if word == friendlyname_jinsuo:
                    hit=True
                    mention_map_pos_com.add(pos)
                elif word == friendlyname_jinsuo+"s":
                    hit=True
                    mention_map_pos_com.add(pos)
                elif word == friendlyname_jinsuo+".":
                    hit=True
                    mention_map_pos_com.add(pos)
                elif word in friendlyname_jinsuo:
                    pos_comb=pos_comb+pos+"\t"
                    word_comb=word_comb+word
                    for j in range(i+1,size_posword):
                      #  print("word_comb"+word_comb)
                        pos_j = posword[j].split("\t")[0]
                        word_j = posword[j].split("\t")[1]
                        word_comb=word_comb+word_j
                     #   print("word_comb" + word_comb)
                        pos_comb=pos_comb+pos_j+"\t"
                        if word_comb==friendlyname_jinsuo:
                            hit=True
                            mention_map_pos_com.add(pos_comb)
                            break
                        elif word_comb==friendlyname_jinsuo+"s":
                            hit=True
                            mention_map_pos_com.add(pos_comb)
                            break
                        elif word_comb==friendlyname_jinsuo+".":
                            hit=True
                            mention_map_pos_com.add(pos_comb)
                            break
                        elif word_comb not in friendlyname_jinsuo:
                            break
            if hit==False:
                print(fnentity_one+"\t"+ques)
                print(posword)
    write_set(mention_map_pos_com,"../data/cluster/train.mention.pos.scomposition")
    return mention_map_pos_com
Пример #6
0
def hit_by_np():

    np_mention_indexrange = read_dict_mention_indexrange(
        "..\\data\\test\\test.easy.ques.np.index.range")
    test_easy_question_posword = read_posques_posword(
        "../data/test/test.easy.quespos.posword")
    ques_indexrange_entity_pros = dict()
    ques_entity_pros = dict()
    #   phrase_num=dict()
    for ques in np_mention_indexrange:
        print(ques)
        posword = test_easy_question_posword[ques]
        wordlist = posword_wordlist(posword)
        mention_indexranges = np_mention_indexrange[ques]
        #n>=3
        n_indexrange = genenrate_indexrange(wordlist)
        np_n_indexrange = set(mention_indexranges) | n_indexrange
        indexrange_phrase = position_to_phrases(np_n_indexrange, wordlist)
        indexrange_entity_pros = dict()
        for indexrange in indexrange_phrase:
            phrase = indexrange_phrase[indexrange]
            entity_pros_friendlyname = friendlyname_entity_match_one(phrase)
            entity_pros_alias = aliases_entity_match_one(phrase)
            entity_pros_name = name_entity_match_one(phrase)
            entity_pros_clueweb = clueweb_entity_match_one(phrase)
            entity_pros_sum = entity_pros_sum_all(entity_pros_friendlyname,
                                                  entity_pros_alias,
                                                  entity_pros_name,
                                                  entity_pros_clueweb)
            # if (ques == "what is needed to prepare cuba libre?"):
            #     print(mention_indexranges)
            #     print(wordlist)
            #     print(indexrange)
            #     print("yeah\t", phrase)
            #     print("yeah\t", entity_pros_friendlyname)
            #     print("yeah\t", entity_pros_alias)
            #     print("yeah\t", entity_pros_name)
            #     print("yeah\t", entity_pros_clueweb)

            indexrange_entity_pros[indexrange] = entity_pros_sum
        ques_indexrange_entity_pros[ques] = indexrange_entity_pros
        entity_pros_per_ques = entity_pros_per_question(indexrange_entity_pros)
        ques_entity_pros[ques] = entity_pros_per_ques
    write_dict_dict(ques_entity_pros,
                    "..\\data\\test\\test.easy.ques.np_entitymatch")
    write_dict_dict_dict(
        ques_indexrange_entity_pros,
        "..\\data\\test\\test.easy.ques.np_ques_indexrange_entity_pros")
    return ques_entity_pros
Пример #7
0
def train_frname_in_test_position():
    ques_frname_in_position = dict()
    question_posword = read_posques_posword(
        "../data/test/test.easy.quespos.posword")
    train_question_fnentity = read_ques_fn_entity(
        "../data/cluster/train.question.friendlyname.entity")
    fnentitys = set()
    for train_ques in train_question_fnentity:
        fnentity = train_question_fnentity[train_ques]
        fnentitys = fnentitys | set(fnentity)
    for ques in question_posword:
        posword = question_posword[ques]
        position_i_j = train_frname_position(fnentitys, posword)
        ques_frname_in_position[ques] = position_i_j
    return ques_frname_in_position
Пример #8
0
def conquer():
    #  question_posword=read_posques_posword("..\\data\\test\\test.quespos.posword")
    question_posword = read_posques_posword(
        "../data/test/test.easy.quespos.posword")
    entity_match = dict()

    for question in question_posword:
        print(question)
        posword = question_posword[question]
        print(posword)
        phrasen_entity_pro = entity_link_rein(posword)
        # phrasen_entity_pro=entity_link(posword)
        if len(phrasen_entity_pro) != 0:
            entity_match[question] = phrasen_entity_pro
        #break
    return entity_match
Пример #9
0
def match_by_friendlyname():
    question_posword = read_posques_posword(
        "../data/test/test.easy.quespos.posword")
    test_question_fnentity = read_ques_fn_entity(
        "../data/test/test.question.friendlyname.entity")
    entity_match = dict()
    for question in question_posword:
        fnentity_test = test_question_fnentity[question]
        posword = question_posword[question]
        word_list = posword_wordlist(posword)
        phrases = combine_wordlist(word_list)
        phrase_frnentity = friendlyname_entity_match(phrases)
        if len(phrase_frnentity) > 0:
            # a=1
            entity_match[question + "###" +
                         "\t".join(fnentity_test)] = phrase_frnentity
    return entity_match
Пример #10
0
def question_word_frequency():
    question_posword = read_posques_posword(
        "../data/test/test.easy.quespos.posword")
    word_num = dict()
    for ques in question_posword:
        posword = question_posword[ques]
        word_list = posword_wordlist(posword)
        for word in set(word_list):
            if word in word_num:
                num = word_num[word]
                num += 1
                word_num[word] = num
            else:
                word_num[word] = 1
    word_num = dict(sorted(word_num.items(), key=lambda d: d[1], reverse=True))
    for word in word_num:
        print(word, "\t", word_num[word])

    return word_num
Пример #11
0
def test_easy_mention_position():
    question_posword = read_posques_posword(
        "..\\data\\test\\test.easy.quespos.posword")
    question_fnentity = read_ques_fn_entity(
        "..\\data\\test\\test.question.friendlyname.entity")
    position_question_posword = dict()
    for ques in question_posword:
        position = list()
        fnentity = question_fnentity[ques]
        posword = question_posword[ques]
        if len(fnentity) != 1:
            print(ques)
        for fnentity_one in fnentity:
            hit = False
            friendlyname = fnentity_one.split("\t")[0]
            entity = fnentity_one.split("\t")[1]
            friendlyname_jinsuo = friendlyname.replace(" ", "")
            friendlyname_jinsuos = friendlyname_jinsuo + "s"
            friendlyname_jinsuodot = friendlyname_jinsuo + "."

            size_posword = len(posword)
            for i in range(0, size_posword):
                pos_comb = ""
                word_comb = ""
                pos = posword[i].split("\t")[0]
                word = posword[i].split("\t")[1]
                word = word.replace("`", "'")
                if (word == friendlyname_jinsuo) | (
                        word == friendlyname_jinsuos) | (
                            word == friendlyname_jinsuodot):
                    hit = True
                    position.append(str(i))
                elif (word in friendlyname_jinsuo) | (
                        word in friendlyname_jinsuos) | (
                            word in friendlyname_jinsuodot):
                    pos_comb = pos_comb + pos + "\t"
                    word_comb = word_comb + word
                    for j in range(i + 1, size_posword):
                        #    print("word_comb"+word_comb)
                        pos_j = posword[j].split("\t")[0]
                        word_j = posword[j].split("\t")[1]
                        word_j = word_j.replace("`", "'")
                        word_comb = word_comb + word_j
                        #   print("word_comb" + word_comb)
                        pos_comb = pos_comb + pos_j + "\t"
                        if (word_comb == friendlyname_jinsuo) | (
                                word_comb == friendlyname_jinsuos) | (
                                    word_comb == friendlyname_jinsuodot):
                            hit = True
                            position.append("\t".join([str(i), str(j)]))
                            break
                        elif (word_comb not in friendlyname_jinsuo) & (
                                word_comb not in friendlyname_jinsuos) & (
                                    word_comb not in friendlyname_jinsuodot):
                            break
            if hit == False:
                print(friendlyname)
                print(ques)
        if len(position) != 1:
            print(position)
            print(friendlyname)
            print(ques)
        if position[0] in position_question_posword:
            question_poswords = position_question_posword[position[0]]
            question_poswords.add(ques + "###" +
                                  "\t".join(question_posword[ques]))
            position_question_posword[position[0]] = question_poswords
        else:
            question_poswords = set()
            question_poswords.add(ques + "###" +
                                  "\t".join(question_posword[ques]))
            position_question_posword[position[0]] = question_poswords
    position_question_posword_sort = dict(
        sorted(position_question_posword.items(),
               key=lambda d: len(d[1]),
               reverse=True))
    # write_dict(position_question_posword_sort, "..\\data\\test\\test.easy.position.mention")
    return position_question_posword
Пример #12
0
def conquer():
    #  question_posword=read_posques_posword("..\\data\\test\\test.quespos.posword")
    question_posword = read_posques_posword(
        "../data/test/test.easy.quespos.posword")
    train_question_posword = read_posques_posword(
        "../data/cluster/train.quespos.posword")
    train_question_fnentity = read_ques_fn_entity(
        "../data/cluster/train.question.friendlyname.entity")
    test_question_fnentity = read_ques_fn_entity(
        "../data/test/test.question.friendlyname.entity")
    entity_match = dict()
    test_pos_equal_mention = mention_pos_equal(question_posword,
                                               train_question_posword,
                                               train_question_fnentity)
    test_pos_similar_mention = mention_pos_similar(question_posword,
                                                   train_question_posword,
                                                   train_question_fnentity)

    for question in question_posword:
        fnentity_test = test_question_fnentity[question]
        posword = question_posword[question]
        word_list = posword_wordlist(posword)
        phrases = combine_wordlist(word_list)
        phrase_frnentity = friendlyname_entity_match(phrases)
        if len(phrase_frnentity) > 0:
            #a=1
            entity_match[question + "###" +
                         "\t".join(fnentity_test)] = phrase_frnentity
        elif question in test_pos_equal_mention:
            # a = 1
            mention_possible = test_pos_equal_mention[question]
            pos_equal_phrase_entityall = dict()
            pos_equal_phrase_frnentity = friendlyname_entity_match(
                mention_possible)
            pos_equal_phrase_aliasentity = aliases_entity_match(
                mention_possible)
            pos_equal_phrase_nameentity = name_entity_match(mention_possible)
            pos_equal_phrase_cluewebentity = clueweb_entity_match(
                mention_possible)
            pos_equal_phrase_entityall = add_dict_dict(
                pos_equal_phrase_entityall, pos_equal_phrase_frnentity)
            pos_equal_phrase_entityall = add_dict_dict(
                pos_equal_phrase_entityall, pos_equal_phrase_aliasentity)
            pos_equal_phrase_entityall = add_dict_dict(
                pos_equal_phrase_entityall, pos_equal_phrase_nameentity)
            pos_equal_phrase_entityall = add_dict_dict(
                pos_equal_phrase_entityall, pos_equal_phrase_cluewebentity)
            entity_match[question + "###" +
                         "\t".join(fnentity_test)] = pos_equal_phrase_entityall
        elif question in test_pos_similar_mention:
            mention_similar_possible = test_pos_similar_mention[question]
            pos_similar_phrase_entityall = dict()
            pos_similar_phrase_frnentity = friendlyname_entity_match(
                mention_similar_possible)
            pos_similar_phrase_aliasentity = aliases_entity_match(
                mention_similar_possible)
            pos_similar_phrase_nameentity = name_entity_match(
                mention_similar_possible)
            pos_similar_phrase_cluewebentity = clueweb_entity_match(
                mention_similar_possible)
            pos_similar_phrase_entityall = add_dict_dict(
                pos_similar_phrase_entityall, pos_similar_phrase_frnentity)
            pos_similar_phrase_entityall = add_dict_dict(
                pos_similar_phrase_entityall, pos_similar_phrase_aliasentity)
            pos_similar_phrase_entityall = add_dict_dict(
                pos_similar_phrase_entityall, pos_similar_phrase_nameentity)
            pos_similar_phrase_entityall = add_dict_dict(
                pos_similar_phrase_entityall, pos_similar_phrase_cluewebentity)
            entity_match[
                question + "###" +
                "\t".join(fnentity_test)] = pos_similar_phrase_entityall

    for ques in entity_match:
        for phrase in entity_match[ques]:
            entity_pros = entity_match[ques][phrase]
            entity_pros = dict(
                sorted(entity_pros.items(), key=lambda d: d[1], reverse=True))
            entity_match[ques][phrase] = entity_pros
    print(len(entity_match))
    return entity_match