Пример #1
0
def easyquestion_friendlyname_pos_entity():
    easyquestion_friendlyname_pos_entity = dict()
    easyquestion_posword = read_posques_posword(
        "..\\data\\test\\test.easy.quespos.posword")
    question_fnentity = read_ques_fn_entity(
        "..\\data\\test\\test.question.friendlyname.entity")
    pos_contained_set = set()
    pos_combination_set = set()
    for ques in easyquestion_posword:
        fnentity = question_fnentity[ques]
        # if(len(fnentity)!=1):
        #     print(ques+"\t"+fnentity)
        posword = easyquestion_posword[ques]
        fnentity_word_pos = fnentity[0]
        # print(fnentity_word_pos)
        pos_ques = ""
        pos_combination = ""
        for pos_word in posword:
            pos = pos_word.split("\t")[0]
            pos_ques = pos_ques + pos + "\t"
            word = pos_word.split("\t")[1]
            if word in fnentity[0]:
                fnentity_word_pos = fnentity_word_pos + "\t" + word + "\t" + pos + "###"
                pos_contained_set.add(pos)
                pos_combination = pos_combination + pos + "\t"
        pos_combination_set.add(pos_combination)
        easyquestion_friendlyname_pos_entity[ques + "###" +
                                             pos_ques] = fnentity_word_pos
    write_dict_str(easyquestion_friendlyname_pos_entity,
                   "..\\data\\test\\test.easyquespos.friename.wordpos")
    print(pos_contained_set)
    print(pos_combination_set)
Пример #2
0
def question_friendlyname_pos_entity():
    question_friendlyname_pos_entity=dict()
    question_posword = read_posques_posword("..\\data\\cluster\\quespos_posword")
    question_fnentity=read_ques_fn_entity("..\\data\\cluster\\train.question.friendlyname.entity")
    pos_contained_set=set()
    pos_combination_set=set()
    for ques in question_posword:
        fnentity=question_fnentity[ques]
        # if(len(fnentity)!=1):
        #     print(ques+"\t"+fnentity)
        posword=question_posword[ques]
        fnentity_word_pos_list=list()
        for fnentity_one in fnentity:
            pos_ques = ""
            fnentity_word_pos=fnentity_one
           # print(fnentity_word_pos)

            pos_combination=""
            for pos_word in posword:
                pos=pos_word.split("\t")[0]
                pos_ques=pos_ques+pos+"\t"
                word=pos_word.split("\t")[1]
                if word in fnentity_one:
                    fnentity_word_pos=fnentity_word_pos+"\t"+word+"\t"+pos+"###"
                    pos_contained_set.add(pos)
                    pos_combination=pos_combination+pos+"\t"
            pos_combination_set.add(pos_combination)
            fnentity_word_pos_list.append(fnentity_word_pos)
        question_friendlyname_pos_entity[ques+"###"+pos_ques]=fnentity_word_pos_list
    write_dict(question_friendlyname_pos_entity,"..\\data\\cluster\\train.easyquespos.friename.wordpos")
    print(pos_contained_set)
    print(pos_combination_set)
Пример #3
0
def question_friendlynamejinsuo_pos_entity():

  #  question_posword = read_posques_posword("Users\\lanlanzh\\kbqa_python\\data\\test\\test.easy.quespos.posword")
    question_posword = read_posques_posword("../data/cluster/train.quespos.posword")
    question_fnentity=read_ques_fn_entity("../data/cluster/train.question.friendlyname.entity")
    mention_map_pos_com=set()
    for ques in question_posword:
        fnentity=question_fnentity[ques]
        posword=question_posword[ques]
        fnentity_word_pos_list=list()
        for fnentity_one in fnentity:
            hit=False
            friendlyname=fnentity_one.split("\t")[0]
            entity=fnentity_one.split("\t")[1]
            friendlyname_jinsuo=friendlyname.replace(" ","")
            size_posword=len(posword)
            for i in range(0,size_posword):
                pos_comb=""
                word_comb=""
                pos=posword[i].split("\t")[0]
                word=posword[i].split("\t")[1]
                if word == friendlyname_jinsuo:
                    hit=True
                    mention_map_pos_com.add(pos)
                elif word == friendlyname_jinsuo+"s":
                    hit=True
                    mention_map_pos_com.add(pos)
                elif word == friendlyname_jinsuo+".":
                    hit=True
                    mention_map_pos_com.add(pos)
                elif word in friendlyname_jinsuo:
                    pos_comb=pos_comb+pos+"\t"
                    word_comb=word_comb+word
                    for j in range(i+1,size_posword):
                      #  print("word_comb"+word_comb)
                        pos_j = posword[j].split("\t")[0]
                        word_j = posword[j].split("\t")[1]
                        word_comb=word_comb+word_j
                     #   print("word_comb" + word_comb)
                        pos_comb=pos_comb+pos_j+"\t"
                        if word_comb==friendlyname_jinsuo:
                            hit=True
                            mention_map_pos_com.add(pos_comb)
                            break
                        elif word_comb==friendlyname_jinsuo+"s":
                            hit=True
                            mention_map_pos_com.add(pos_comb)
                            break
                        elif word_comb==friendlyname_jinsuo+".":
                            hit=True
                            mention_map_pos_com.add(pos_comb)
                            break
                        elif word_comb not in friendlyname_jinsuo:
                            break
            if hit==False:
                print(fnentity_one+"\t"+ques)
                print(posword)
    write_set(mention_map_pos_com,"../data/cluster/train.mention.pos.scomposition")
    return mention_map_pos_com
Пример #4
0
def train_mention_pos_equal_position():
    question_posword = read_posques_posword(
        "../data/test/test.easy.quespos.posword")
    train_question_posword = read_posques_posword(
        "../data/cluster/train.quespos.posword")
    train_question_fnentity = read_ques_fn_entity(
        "../data/cluster/train.question.friendlyname.entity")
    #  test_question_fnentity = read_ques_fn_entity("../data/test/test.question.friendlyname.entity")
    ques_position_pos_equal = mention_position_pos_equal(
        question_posword, train_question_posword, train_question_fnentity)
    return ques_position_pos_equal
Пример #5
0
def entity_not_hit_question():
    questions_not_hit = set()
    ques_entity_pros = hit_by_np()
    question_fnentity = read_ques_fn_entity(
        "..\\data\\test\\test.question.friendlyname.entity")
    for ques in ques_entity_pros:
        entity_pros = ques_entity_pros[ques]
        friendlyname_entity = question_fnentity[ques]
        entity_goal = friendlyname_entity[0].split("\t")[1]
        if entity_goal not in entity_pros:
            questions_not_hit.add(ques)
    write_set(questions_not_hit, "..\\data\\test\\test.easy.ques.np_not_hit")
    return questions_not_hit
Пример #6
0
def entity_pro_hit_question():
    ques_entity_pro_hit = dict()
    ques_entity_pros = hit_by_np()
    question_fnentity = read_ques_fn_entity(
        "..\\data\\test\\test.question.friendlyname.entity")
    for ques in ques_entity_pros:
        entity_pros = ques_entity_pros[ques]
        friendlyname_entity = question_fnentity[ques]
        entity_goal = friendlyname_entity[0].split("\t")[1]
        if entity_goal in entity_pros:
            ques_entity_pro_hit[ques] = str(entity_goal) + "\t" + str(
                entity_pros[entity_goal])
    write_dict_str(ques_entity_pro_hit,
                   "..\\data\\test\\test.easy.ques.np_hit_entity_pro")
    return ques_entity_pro_hit
Пример #7
0
def train_frname_in_test_position():
    ques_frname_in_position = dict()
    question_posword = read_posques_posword(
        "../data/test/test.easy.quespos.posword")
    train_question_fnentity = read_ques_fn_entity(
        "../data/cluster/train.question.friendlyname.entity")
    fnentitys = set()
    for train_ques in train_question_fnentity:
        fnentity = train_question_fnentity[train_ques]
        fnentitys = fnentitys | set(fnentity)
    for ques in question_posword:
        posword = question_posword[ques]
        position_i_j = train_frname_position(fnentitys, posword)
        ques_frname_in_position[ques] = position_i_j
    return ques_frname_in_position
Пример #8
0
def match_by_friendlyname():
    question_posword = read_posques_posword(
        "../data/test/test.easy.quespos.posword")
    test_question_fnentity = read_ques_fn_entity(
        "../data/test/test.question.friendlyname.entity")
    entity_match = dict()
    for question in question_posword:
        fnentity_test = test_question_fnentity[question]
        posword = question_posword[question]
        word_list = posword_wordlist(posword)
        phrases = combine_wordlist(word_list)
        phrase_frnentity = friendlyname_entity_match(phrases)
        if len(phrase_frnentity) > 0:
            # a=1
            entity_match[question + "###" +
                         "\t".join(fnentity_test)] = phrase_frnentity
    return entity_match
Пример #9
0
def test_easy_mention_position():
    question_posword = read_posques_posword(
        "..\\data\\test\\test.easy.quespos.posword")
    question_fnentity = read_ques_fn_entity(
        "..\\data\\test\\test.question.friendlyname.entity")
    position_question_posword = dict()
    for ques in question_posword:
        position = list()
        fnentity = question_fnentity[ques]
        posword = question_posword[ques]
        if len(fnentity) != 1:
            print(ques)
        for fnentity_one in fnentity:
            hit = False
            friendlyname = fnentity_one.split("\t")[0]
            entity = fnentity_one.split("\t")[1]
            friendlyname_jinsuo = friendlyname.replace(" ", "")
            friendlyname_jinsuos = friendlyname_jinsuo + "s"
            friendlyname_jinsuodot = friendlyname_jinsuo + "."

            size_posword = len(posword)
            for i in range(0, size_posword):
                pos_comb = ""
                word_comb = ""
                pos = posword[i].split("\t")[0]
                word = posword[i].split("\t")[1]
                word = word.replace("`", "'")
                if (word == friendlyname_jinsuo) | (
                        word == friendlyname_jinsuos) | (
                            word == friendlyname_jinsuodot):
                    hit = True
                    position.append(str(i))
                elif (word in friendlyname_jinsuo) | (
                        word in friendlyname_jinsuos) | (
                            word in friendlyname_jinsuodot):
                    pos_comb = pos_comb + pos + "\t"
                    word_comb = word_comb + word
                    for j in range(i + 1, size_posword):
                        #    print("word_comb"+word_comb)
                        pos_j = posword[j].split("\t")[0]
                        word_j = posword[j].split("\t")[1]
                        word_j = word_j.replace("`", "'")
                        word_comb = word_comb + word_j
                        #   print("word_comb" + word_comb)
                        pos_comb = pos_comb + pos_j + "\t"
                        if (word_comb == friendlyname_jinsuo) | (
                                word_comb == friendlyname_jinsuos) | (
                                    word_comb == friendlyname_jinsuodot):
                            hit = True
                            position.append("\t".join([str(i), str(j)]))
                            break
                        elif (word_comb not in friendlyname_jinsuo) & (
                                word_comb not in friendlyname_jinsuos) & (
                                    word_comb not in friendlyname_jinsuodot):
                            break
            if hit == False:
                print(friendlyname)
                print(ques)
        if len(position) != 1:
            print(position)
            print(friendlyname)
            print(ques)
        if position[0] in position_question_posword:
            question_poswords = position_question_posword[position[0]]
            question_poswords.add(ques + "###" +
                                  "\t".join(question_posword[ques]))
            position_question_posword[position[0]] = question_poswords
        else:
            question_poswords = set()
            question_poswords.add(ques + "###" +
                                  "\t".join(question_posword[ques]))
            position_question_posword[position[0]] = question_poswords
    position_question_posword_sort = dict(
        sorted(position_question_posword.items(),
               key=lambda d: len(d[1]),
               reverse=True))
    # write_dict(position_question_posword_sort, "..\\data\\test\\test.easy.position.mention")
    return position_question_posword
Пример #10
0
def conquer():
    #  question_posword=read_posques_posword("..\\data\\test\\test.quespos.posword")
    question_posword = read_posques_posword(
        "../data/test/test.easy.quespos.posword")
    train_question_posword = read_posques_posword(
        "../data/cluster/train.quespos.posword")
    train_question_fnentity = read_ques_fn_entity(
        "../data/cluster/train.question.friendlyname.entity")
    test_question_fnentity = read_ques_fn_entity(
        "../data/test/test.question.friendlyname.entity")
    entity_match = dict()
    test_pos_equal_mention = mention_pos_equal(question_posword,
                                               train_question_posword,
                                               train_question_fnentity)
    test_pos_similar_mention = mention_pos_similar(question_posword,
                                                   train_question_posword,
                                                   train_question_fnentity)

    for question in question_posword:
        fnentity_test = test_question_fnentity[question]
        posword = question_posword[question]
        word_list = posword_wordlist(posword)
        phrases = combine_wordlist(word_list)
        phrase_frnentity = friendlyname_entity_match(phrases)
        if len(phrase_frnentity) > 0:
            #a=1
            entity_match[question + "###" +
                         "\t".join(fnentity_test)] = phrase_frnentity
        elif question in test_pos_equal_mention:
            # a = 1
            mention_possible = test_pos_equal_mention[question]
            pos_equal_phrase_entityall = dict()
            pos_equal_phrase_frnentity = friendlyname_entity_match(
                mention_possible)
            pos_equal_phrase_aliasentity = aliases_entity_match(
                mention_possible)
            pos_equal_phrase_nameentity = name_entity_match(mention_possible)
            pos_equal_phrase_cluewebentity = clueweb_entity_match(
                mention_possible)
            pos_equal_phrase_entityall = add_dict_dict(
                pos_equal_phrase_entityall, pos_equal_phrase_frnentity)
            pos_equal_phrase_entityall = add_dict_dict(
                pos_equal_phrase_entityall, pos_equal_phrase_aliasentity)
            pos_equal_phrase_entityall = add_dict_dict(
                pos_equal_phrase_entityall, pos_equal_phrase_nameentity)
            pos_equal_phrase_entityall = add_dict_dict(
                pos_equal_phrase_entityall, pos_equal_phrase_cluewebentity)
            entity_match[question + "###" +
                         "\t".join(fnentity_test)] = pos_equal_phrase_entityall
        elif question in test_pos_similar_mention:
            mention_similar_possible = test_pos_similar_mention[question]
            pos_similar_phrase_entityall = dict()
            pos_similar_phrase_frnentity = friendlyname_entity_match(
                mention_similar_possible)
            pos_similar_phrase_aliasentity = aliases_entity_match(
                mention_similar_possible)
            pos_similar_phrase_nameentity = name_entity_match(
                mention_similar_possible)
            pos_similar_phrase_cluewebentity = clueweb_entity_match(
                mention_similar_possible)
            pos_similar_phrase_entityall = add_dict_dict(
                pos_similar_phrase_entityall, pos_similar_phrase_frnentity)
            pos_similar_phrase_entityall = add_dict_dict(
                pos_similar_phrase_entityall, pos_similar_phrase_aliasentity)
            pos_similar_phrase_entityall = add_dict_dict(
                pos_similar_phrase_entityall, pos_similar_phrase_nameentity)
            pos_similar_phrase_entityall = add_dict_dict(
                pos_similar_phrase_entityall, pos_similar_phrase_cluewebentity)
            entity_match[
                question + "###" +
                "\t".join(fnentity_test)] = pos_similar_phrase_entityall

    for ques in entity_match:
        for phrase in entity_match[ques]:
            entity_pros = entity_match[ques][phrase]
            entity_pros = dict(
                sorted(entity_pros.items(), key=lambda d: d[1], reverse=True))
            entity_match[ques][phrase] = entity_pros
    print(len(entity_match))
    return entity_match