Пример #1
0
def run_grounding_graph_path_match(input_file_folder):
    '''path candidate grounding graph'''
    from grounding.ranking.path_match_nn.path_match_interface import PathMatchByLexicalNN
    import os
    from parsing.parsing_utils import extract_importantwords_from_question
    all_data_path = os.listdir(input_file_folder)
    pml = PathMatchByLexicalNN()
    for path in all_data_path:
        print(path)
        structure_with_grounded_graphq_file = input_file_folder + path
        structure_list = read_structure_file(
            structure_with_grounded_graphq_file)
        for structure in structure_list:
            question = structure.question
            for ungrounded_graph in structure.ungrounded_graph_forest:
                importantwords_list = extract_importantwords_from_question(
                    question=question, ungrounded_graph=ungrounded_graph)
                print(importantwords_list,
                      len(ungrounded_graph.get_grounded_graph_forest()))
                for grounded_graph in ungrounded_graph.get_grounded_graph_forest(
                ):
                    grounded_graph.score = pml.get_path_pro(
                        grounded_graph.key_path, importantwords_list
                    )  # '\t'.join(grounded_graph.key_path),
                    print(grounded_graph.key_path, importantwords_list,
                          grounded_graph.score)
        write_structure_file(structure_list,
                             structure_with_grounded_graphq_file)
Пример #2
0
def run_grounding_graph_question_match_minus(input_file_folder):
    '''path candidate grounding graph'''
    import os
    from common import utils
    for path in os.listdir(input_file_folder):
        print(path)
        structure_with_grounded_graphq_file = input_file_folder + path
        structure_list = read_structure_file(
            structure_with_grounded_graphq_file)
        all_score = []
        for structure in structure_list:
            for ungrounded_graph in structure.ungrounded_graph_forest:
                for grounded_graph in ungrounded_graph.get_grounded_graph_forest(
                ):
                    all_score.append(grounded_graph.score)

        all_score_guiyi = utils.Normalize(all_score)
        score_guiyi = dict()
        for i, score_ori in enumerate(all_score):
            score_guiyi[score_ori] = all_score_guiyi[i]

        for structure in structure_list:
            # qid = structure.qid
            for ungrounded_graph in structure.ungrounded_graph_forest:
                for grounded_graph in ungrounded_graph.get_grounded_graph_forest(
                ):
                    # 4.跑 total - score_guiyi
                    # grounded_graph.total_score = score_guiyi[grounded_graph.score] + qmi.get_score(qid, grounded_graph.denotation)
                    grounded_graph.total_score = grounded_graph.total_score - score_guiyi[
                        grounded_graph.score]
                    # return
        write_structure_file(structure_list,
                             structure_with_grounded_graphq_file)
Пример #3
0
def run_grounded_node_grounding_freebase(structure_with_ungrounded_graphq_file,
                                         output_file):
    '''
     #2.1
    function: 1.0 ungrounded query  ->  2.1 grounded query
    input: structure_ungrounded_graphq_file
    :return: grounded graph with entity linking
    '''
    from grounding._2_1_grounded_graph import node_linking_interface_freebase
    from grounding._2_1_grounded_graph.grounded_graph_2_1_generation import generate_grounded_graph_interface
    structure_list = read_structure_file(structure_with_ungrounded_graphq_file)
    for structure in structure_list:
        print(structure.qid)
        for i, ungrounded_graph in enumerate(
                structure.get_ungrounded_graph_forest()):
            if i == len(structure.get_ungrounded_graph_forest()) - 1:
                grounding_result_list = []
                for node in ungrounded_graph.nodes:
                    grounding_result_list.append(
                        (node,
                         node_linking_interface_freebase.node_linking(
                             qid=structure.qid, node=node)))
                grouned_graph_list = generate_grounded_graph_interface(
                    ungrounded_graph=ungrounded_graph,
                    grounding_result_list=grounding_result_list)
                ungrounded_graph.set_grounded_linking(grounding_result_list)
                ungrounded_graph.set_grounded_graph_forest(grouned_graph_list)
    write_structure_file(structure_list, output_file)
Пример #4
0
def computed_every_grounded_graph_f1_cwq(input_file):
    from datasets_interface.question_interface import complexwebquestion_interface
    all_structure_path = os.listdir(input_file)
    error_list = []
    for structure_path in all_structure_path:
        structure_with_grounded_graphq_file = input_file + structure_path
        print(structure_path)
        try:
            structure_list = read_structure_file(
                structure_with_grounded_graphq_file)
            for structure in structure_list:
                gold_answer_mid_set = complexwebquestion_interface.get_answers_by_question(
                    structure.question)
                for ungrounded_graph in structure.ungrounded_graph_forest:
                    for grounded_graph in ungrounded_graph.get_grounded_graph_forest(
                    ):
                        system_denotation_set = set(grounded_graph.denotation)
                        recall, precision, f1 = sempre_evaluation.computeF1(
                            gold_answer_mid_set, system_denotation_set)
                        grounded_graph.f1_score = f1
                        grounded_graph.recall_score = recall
                        grounded_graph.precision_score = precision
            write_structure_file(structure_list, input_file + structure_path)
        except Exception as e:
            print('error')
            error_list.append(structure_path)
    print('error_list:\t', error_list)
Пример #5
0
def computed_every_grounded_graph_f1_graphq(input_file):
    from datasets_interface.question_interface import graphquestion_interface
    for structure_path in os.listdir(input_file):
        structure_with_grounded_graphq_file = input_file + structure_path
        print(structure_path)
        structure_list = read_structure_file(
            structure_with_grounded_graphq_file)
        for structure in structure_list:
            gold_answers_mid_set = graphquestion_interface.get_answers_mid_by_question(
                structure.question)
            for ungrounded_graph in structure.ungrounded_graph_forest:
                for grounded_graph in ungrounded_graph.get_grounded_graph_forest(
                ):
                    new_system_answers_list = []
                    for system_answer in set(grounded_graph.denotation):
                        if isinstance(system_answer, int):
                            new_system_answers_list.append(str(system_answer))
                        else:
                            new_system_answers_list.append(system_answer)
                    recall, precision, f1 = sempre_evaluation.computeF1(
                        gold_answers_mid_set, new_system_answers_list)
                    grounded_graph.f1_score = f1
                    grounded_graph.recall_score = recall
                    grounded_graph.precision_score = precision
                    if f1 > 0:
                        print(
                            structure_path, f1
                        )  # print(structure_path, gold_answers_mid_set, new_system_answers_list, f1)
            structure.gold_answer = gold_answers_mid_set  # update answers by answer mid list   ["Kimberly-Clark"]  ['en.kimberly-clark']
        write_structure_file(structure_list, input_file + structure_path)
Пример #6
0
def run_grounded_node_grounding_dbpedia_gold(
        structure_with_ungrounded_graphq_file, output_file):
    '''
     #2.1
    function: 1.0 ungrounded query  ->  2.1 grounded query
    input: structure_ungrounded_graphq_file
    :return: grounded graph with entity linking
    '''
    from datasets_interface.question_interface import lcquad_1_0_interface
    from grounding._2_1_grounded_graph.grounded_graph_2_1_generation import generate_grounded_graph_interface
    structure_list = read_structure_file(structure_with_ungrounded_graphq_file)
    for structure in structure_list:
        print(structure.qid)
        for i, ungrounded_graph in enumerate(
                structure.get_ungrounded_graph_forest()):
            if i == len(structure.get_ungrounded_graph_forest()) - 1:
                grounding_result_list = []
                for node in ungrounded_graph.nodes:
                    # (node(barbaro), {'en.barbaro': 1.6}), get_el_result(question=structure.question, nid=node.nid)
                    grounding_result_list.append(
                        (node,
                         lcquad_1_0_interface.
                         get_topic_entities_list_by_question_and_nodemention(
                             question=structure.question,
                             mention=node.friendly_name)))
                grouned_graph_list = generate_grounded_graph_interface(
                    ungrounded_graph=ungrounded_graph,
                    grounding_result_list=grounding_result_list)
                ungrounded_graph.set_grounded_linking(grounding_result_list)
                ungrounded_graph.set_grounded_graph_forest(grouned_graph_list)
    write_structure_file(structure_list, output_file)
Пример #7
0
def run_grounded_graph_generation_by_structure_transformation(
        structure_with_grounded_graphq_node_grounding_file, output_file):
    from grounding._2_2_grounded_graph_offline import graph_2_1_to_2_2_by_transfer
    from grounding.grounded_graph_to_sparql import grounded_graph_to_sparql_CWQ

    def count_denotation_to_num(grounded_graph):
        '''
        # counting
        # how many softwares are developed by google?
        '''
        num = 0
        denotation_set = grounded_graph.denotation
        if denotation_set is not None:
            num = len(denotation_set)
        return [num]

    structure_list = read_structure_file(
        structure_with_grounded_graphq_node_grounding_file)
    new_structure_list = []
    error_qid_list = []
    for i, structure in enumerate(structure_list):
        if str(structure.qid) + '.json' in os.listdir(output_file): continue
        new_structure_list.clear()
        print(i, structure.qid, structure.question)
        is_print = False
        for ungrounded_graph in structure.ungrounded_graph_forest:
            grounded_graph_forest = []
            for _2_1_grounded_graph in ungrounded_graph.get_grounded_graph_forest(
            ):
                try:
                    grounded_graph_forest.extend(
                        graph_2_1_to_2_2_by_transfer.
                        generate_candidates_by_2_1_grounded_graph_interface(
                            _2_1_grounded_graph=_2_1_grounded_graph))
                except Exception as e:
                    print('#Error:', structure.qid, e)
                    error_qid_list.append(structure.qid)
                # break
            if len(grounded_graph_forest) > 0:
                is_print = True
                print('#Size:', len(grounded_graph_forest))
            for z in range(len(grounded_graph_forest)):
                grounded_graph_forest[
                    z].grounded_query_id = ungrounded_graph.ungrounded_query_id * 100000 + z
                grounded_graph_forest[
                    z].sparql_query = grounded_graph_to_sparql_CWQ(
                        grounded_graph_forest[z])
                if structure.function == 'count':
                    grounded_graph_forest[
                        z].denotation = count_denotation_to_num(
                            grounded_graph_forest[z])
            ungrounded_graph.set_grounded_graph_forest(grounded_graph_forest)
        if is_print:
            new_structure_list.append(structure)
            write_structure_file(new_structure_list,
                                 output_file + str(structure.qid) + '.json')
    print('Error qid list:', error_qid_list)
Пример #8
0
def run_ungrounded_graph_from_graphq(graph_questions_filepath, output_file):
    from datasets_interface.question_interface import graphquestion_interface
    graph_questions_struct = graphquestion_interface.read_graph_question_json(graph_questions_filepath)
    tuples_list = []
    for i in range(len(graph_questions_struct)):
        graphquestion = graph_questions_struct[i]
        tuples_list.append((graphquestion.qid, graphquestion.question, graphquestion.graph_query, graphquestion.answer))
    structure_list = running_interface.run_query_graph_generation(tuples_list=tuples_list)
    write_structure_file(structure_list, output_file)
Пример #9
0
def computed_every_grounded_graph_f1_webq_name(input_file, answer_file,
                                               mid_to_names_file):
    # from datasets_interface.freebase import webquestions_interface
    # from evaluation.webq_denotation import webq_mid_to_names_process
    #------------------------------------------------
    #read qid-to-answers
    qid_to_answers_dict = dict()
    lines = read_list(answer_file)
    for line in lines:
        cols = line.split('\t')
        qid_to_answers_dict[cols[0]] = eval(cols[2])
    #------------------------------------------------
    # mid to names dict
    mid_to_names_dict = dict()
    lines = read_list(mid_to_names_file)
    for line in lines:
        cols = line.split('\t')
        mid = cols[1]
        names = list(eval(cols[2]))
        mid_to_names_dict[mid] = names
    #------------------------------------------------
    all_structure_path = os.listdir(input_file)
    for structure_path in all_structure_path:
        structure_with_grounded_graphq_file = input_file + structure_path
        structure_list = read_structure_file(
            structure_with_grounded_graphq_file)
        for structure in structure_list:
            qid = structure.qid
            gold_answer_names_set = evaluation_utils.search_for_answers_by_id(
                qid, qid_to_answers_dict)

            print(structure_path, '#gold:\t', gold_answer_names_set)
            for ungrounded_graph in structure.ungrounded_graph_forest:
                for grounded_graph in ungrounded_graph.get_grounded_graph_forest(
                ):
                    system_denotation_names_set = set()
                    for denotation_mid in grounded_graph.denotation:
                        denotation_name = evaluation_utils.get_name_by_mid(
                            denotation_mid, mid_to_names_dict)
                        print('###denotation:\t', denotation_mid,
                              denotation_name)
                        if denotation_name is not None:
                            system_denotation_names_set.add(denotation_name)
                        else:
                            print(denotation_mid, '#####error!!!',
                                  denotation_name)
                    print('#gold:\t', gold_answer_names_set, '#system:\t',
                          system_denotation_names_set)
                    recall, precision, f1 = sempre_evaluation.computeF1(
                        gold_answer_names_set, system_denotation_names_set)
                    if f1 > 0.0:
                        print('#result:\t', f1)
                    grounded_graph.f1_score = f1
        write_structure_file(structure_list, input_file + structure_path)
Пример #10
0
def run_ungrounded_graph_from_complexwebquestion(
        complexquestin_filepath, structure_with_1_ungrounded_cwq_file):
    from datasets_interface.question_interface import complexwebquestion_interface
    from common.hand_files import write_structure_file
    complexwebq_list = complexwebquestion_interface.read_complexwebq_question_json(
        complexquestin_filepath)
    tuples_list = []
    for i, complexwebq_struct in enumerate(complexwebq_list):
        tuples_list.append(
            (complexwebq_struct.ID, complexwebq_struct.question,
             complexwebq_struct.sparql, complexwebq_struct.answers))
    print(len(tuples_list))
    structure_list = running_interface.run_query_graph_generation(
        tuples_list=tuples_list)
    write_structure_file(structure_list, structure_with_1_ungrounded_cwq_file)
Пример #11
0
def computed_every_grounded_graph_f1_graphq(input_file):

    from grounding.grounding_args import test_qid_to_answers_mid_dict, train_qid_to_answers_mid_dict
    for structure_path in os.listdir(input_file):  #all_structure_path
        structure_with_grounded_graphq_file = input_file + structure_path
        print(structure_path)
        structure_list = read_structure_file(
            structure_with_grounded_graphq_file)

        for structure in structure_list:
            gold_answers_mid_set = []
            qid = structure.qid
            if qid in test_qid_to_answers_mid_dict:
                gold_answers_mid_set = test_qid_to_answers_mid_dict[qid]
            elif qid in train_qid_to_answers_mid_dict:
                gold_answers_mid_set = train_qid_to_answers_mid_dict[qid]

            #[80] -> ['80']
            new_gold_answers_set = set()
            for gold_answer in gold_answers_mid_set:
                if isinstance(gold_answer, int):
                    new_gold_answers_set.add(str(gold_answer))
                else:
                    new_gold_answers_set.add(gold_answer)
            gold_answers_mid_set = list(new_gold_answers_set)

            for ungrounded_graph in structure.ungrounded_graph_forest:
                for grounded_graph in ungrounded_graph.get_grounded_graph_forest(
                ):
                    system_denotation_set = set(grounded_graph.denotation)
                    new_system_answers_set = set()
                    for system_answer in system_denotation_set:
                        if isinstance(system_answer, int):
                            new_system_answers_set.add(str(system_answer))
                        else:
                            new_system_answers_set.add(system_answer)
                    new_system_answers_set = list(new_system_answers_set)

                    recall, precision, f1 = sempre_evaluation.computeF1(
                        gold_answers_mid_set, new_system_answers_set)
                    print(structure_path, gold_answers_mid_set,
                          new_system_answers_set, f1)
                    grounded_graph.f1_score = f1
                    if f1 > 0:
                        print(f1)
            # update answers by answer mid list   ["Kimberly-Clark"]  ['en.kimberly-clark']
            structure.gold_answer = gold_answers_mid_set
        write_structure_file(structure_list, input_file + structure_path)
Пример #12
0
def run_topic_entities_from_lcquad(filepath,
                                   structure_with_1_ungrounded_lcquad_file,
                                   node_is_gold=False,
                                   linking_is_gold=False):
    from datasets_interface.question_interface import lcquad_1_0_interface
    lcquad_list = lcquad_1_0_interface.read_train_test_data(filepath)
    tuples_list = []
    for i, lcquad_struct in enumerate(lcquad_list):
        tuples_list.append((lcquad_struct.qid, lcquad_struct.question_normal,
                            lcquad_struct.sparql, None))
    structure_list = ir_module.run_topics_entity_generation_dbpedia(
        tuples_list=tuples_list,
        node_is_gold=node_is_gold,
        linking_is_gold=linking_is_gold,
        q_mode='lcquad')
    write_structure_file(structure_list,
                         structure_with_1_ungrounded_lcquad_file)
Пример #13
0
def run_topic_entities_from_cwq(filepath,
                                structure_with_1_ungrounded_cwq_file,
                                node_is_gold=False,
                                linking_is_gold=False):
    from datasets_interface.question_interface import complexwebquestion_interface
    cwq_list = complexwebquestion_interface.read_complexwebq_question_json(
        filepath)
    tuples_list = []
    for i, cwq_struct in enumerate(cwq_list):
        tuples_list.append((cwq_struct.ID, cwq_struct.question,
                            cwq_struct.sparql, cwq_struct.answers))
    structure_list = ir_module.run_topics_entity_generation_freebase(
        tuples_list=tuples_list,
        node_is_gold=node_is_gold,
        linking_is_gold=linking_is_gold,
        q_mode='cwq')
    write_structure_file(structure_list, structure_with_1_ungrounded_cwq_file)
Пример #14
0
def run_ungrounded_graph_from_lcquad(filepath,
                                     structure_with_1_ungrounded_lcquad_file,
                                     node_is_gold=False,
                                     parser_mode='skeleton'):
    from datasets_interface.question_interface import lcquad_1_0_interface
    lcquad_list = lcquad_1_0_interface.read_train_test_data(filepath)
    tuples_list = []
    for i, lcquad_struct in enumerate(lcquad_list):
        tuples_list.append((lcquad_struct.qid, lcquad_struct.question_normal,
                            lcquad_struct.sparql, None))
    print(len(tuples_list))
    structure_list = sp_modules.run_query_graph_generation(
        tuples_list=tuples_list,
        node_is_gold=node_is_gold,
        parser_mode=parser_mode,
        q_mode='lcquad')
    write_structure_file(structure_list,
                         structure_with_1_ungrounded_lcquad_file)
Пример #15
0
def run_ungrounded_graph_from_graphq(graph_questions_filepath,
                                     output_file,
                                     node_is_gold=False,
                                     parser_mode='skeleton'):
    from datasets_interface.question_interface import graphquestion_interface
    graph_questions_struct = graphquestion_interface.read_graph_question_json(
        graph_questions_filepath)
    tuples_list = []
    for i, graphquestion in enumerate(graph_questions_struct):
        tuples_list.append((graphquestion.qid, graphquestion.question,
                            graphquestion.graph_query, graphquestion.answer))
    print(len(tuples_list))
    structure_list = sp_modules.run_query_graph_generation(
        tuples_list=tuples_list,
        node_is_gold=node_is_gold,
        parser_mode=parser_mode,
        q_mode='graphq')
    write_structure_file(structure_list, output_file)
Пример #16
0
def run_grounding_graph_add_question_match(input_file_folder):
    '''path candidate grounding graph'''
    all_data_path = os.listdir(input_file_folder)
    from grounding.ranking.path_match_sentence_level.question_match_interface import QuestionMatchInterface
    qmi = QuestionMatchInterface()
    for path in all_data_path:
        print(path)
        structure_with_grounded_graphq_file = input_file_folder + path
        structure_list = read_structure_file(
            structure_with_grounded_graphq_file)
        for structure in structure_list:
            qid = structure.qid
            for ungrounded_graph in structure.ungrounded_graph_forest:
                for grounded_graph in ungrounded_graph.get_grounded_graph_forest(
                ):
                    grounded_graph.total_score = grounded_graph.score + qmi.get_score(
                        qid, grounded_graph.denotation)
        write_structure_file(structure_list,
                             structure_with_grounded_graphq_file)
Пример #17
0
def run_grounding_graph_guiyi_add_question_match(input_file_folder):
    '''path candidate grounding graph'''
    import os
    from common import utils
    from grounding.ranking.path_match_nn.question_match_interface import QuestionMatchInterface
    qmi = QuestionMatchInterface()
    for path in os.listdir(input_file_folder):
        print(path)
        structure_with_grounded_graphq_file = input_file_folder + path
        structure_list = read_structure_file(
            structure_with_grounded_graphq_file)
        all_score = []
        for structure in structure_list:
            for ungrounded_graph in structure.ungrounded_graph_forest:
                for grounded_graph in ungrounded_graph.get_grounded_graph_forest(
                ):
                    all_score.append(grounded_graph.score)

        all_score_guiyi = utils.Normalize(all_score)
        score_guiyi = dict()
        for i, score_ori in enumerate(all_score):
            score_guiyi[score_ori] = all_score_guiyi[i]

        for structure in structure_list:
            qid = structure.qid
            for ungrounded_graph in structure.ungrounded_graph_forest:
                for grounded_graph in ungrounded_graph.get_grounded_graph_forest(
                ):
                    # 3.单独跑 question match
                    # grounded_graph.total_score = qmi.get_score(qid, grounded_graph.denotation)
                    # if grounded_graph.total_score > 0:
                    #     print ('\t\t', grounded_graph.total_score)
                    # 4.单独跑 question match
                    grounded_graph.score = qmi.get_score(
                        qid, grounded_graph.denotation)
                    if grounded_graph.score > 0:
                        print('\t\t', grounded_graph.score)
                    # 4.跑word match+question match
                    # grounded_graph.total_score = score_guiyi[grounded_graph.score] + qmi.get_score(qid, grounded_graph.denotation)
                    # return
        write_structure_file(structure_list,
                             structure_with_grounded_graphq_file)
Пример #18
0
def run_candidate_graph_generation(structure_with_1_ungrounded_lcquad_file, output_file, q_mode='lcquad'):
    from method_ir.grounding import graph_2_1_to_2_2_ir
    from method_sp.grounding import grounded_graph_to_sparql
    from method_sp.grounding import sparql_to_denotation
    import os
    structure_list = read_structure_file(structure_with_1_ungrounded_lcquad_file)
    error_qid_list = []
    for _, structure in enumerate(structure_list):
        if str(structure.qid) + '.json' in os.listdir(output_file):
            continue
        print(structure.qid)
        compositionality_type = structure.compositionality_type
        for j, ungrounded_graph in enumerate(structure.ungrounded_graph_forest):
            if j != len(structure.ungrounded_graph_forest) - 1:
                continue
            grounded_graph_forest = []
            for _2_1_grounded_graph in ungrounded_graph.get_grounded_graph_forest():
                try:
                    if q_mode == 'graphq':
                        grounded_graph_forest.extend(graph_2_1_to_2_2_ir.get_oracle_graphs_by_2_1_graph_graphq(_2_1_grounded_graph=_2_1_grounded_graph, qtype=compositionality_type))
                    elif q_mode == 'cwq':
                        grounded_graph_forest.extend(graph_2_1_to_2_2_ir.get_oracle_graphs_by_2_1_graph_cwq(_2_1_grounded_graph=_2_1_grounded_graph, qtype=compositionality_type))
                    elif q_mode == 'lcquad':
                        grounded_graph_forest.extend(graph_2_1_to_2_2_ir.get_oracle_graphs_by_2_1_graph_lcquad(_2_1_grounded_graph=_2_1_grounded_graph, qtype=compositionality_type))
                except Exception as e:
                    print('#Error:', structure.qid, e)
                    grounded_graph_forest.clear()
                    error_qid_list.append(structure.qid)
                break
            for z in range(len(grounded_graph_forest)):
                grounded_graph_forest[z].grounded_query_id = ungrounded_graph.ungrounded_query_id * 100000 + z
                grounded_graph_forest[z].sparql_query = grounded_graph_to_sparql.grounded_graph_to_sparql(grounded_graph=grounded_graph_forest[z],
                                                                                                          q_function=structure.function,
                                                                                                          q_compositionality_type=structure.compositionality_type,
                                                                                                          q_mode=q_mode)
                grounded_graph_forest[z].denotation = sparql_to_denotation.set_denotation(grounded_graph=grounded_graph_forest[z],
                                                                                          q_compositionality_type=structure.compositionality_type)
            ungrounded_graph.set_grounded_graph_forest(grounded_graph_forest)
            print('#size:\t', len(grounded_graph_forest))
            if len(grounded_graph_forest) > 0:
                write_structure_file([structure], output_file + str(structure.qid) + '.json')
    print('Error qid list:', error_qid_list)
Пример #19
0
def computed_every_grounded_graph_f1_webq_mid(input_file, answer_file):
    #read qid-to-answers
    all_structure_path = os.listdir(input_file)
    for structure_path in all_structure_path:
        structure_with_grounded_graphq_file = input_file + structure_path
        structure_list = read_structure_file(
            structure_with_grounded_graphq_file)
        for structure in structure_list:
            qid = structure.qid
            gold_answer_mid_set = evaluation_utils.search_for_answers_by_id(
                qid, qid_to_answers_dict)
            print(structure_path, gold_answer_mid_set)
            for ungrounded_graph in structure.ungrounded_graph_forest:
                for grounded_graph in ungrounded_graph.get_grounded_graph_forest(
                ):
                    system_denotation_set = set(grounded_graph.denotation)
                    recall, precision, f1 = sempre_evaluation.computeF1(
                        gold_answer_mid_set, system_denotation_set)
                    grounded_graph.f1_score = f1
        write_structure_file(structure_list, input_file + structure_path)
Пример #20
0
def run_topic_entities_from_graphq(graph_questions_filepath,
                                   structure_with_1_ungrounded_graphq_file,
                                   node_is_gold=False,
                                   linking_is_gold=False):
    from datasets_interface.question_interface import graphquestion_interface
    graph_questions_struct = graphquestion_interface.read_graph_question_json(
        graph_questions_filepath)
    tuples_list = []
    for i, graphquestion in enumerate(graph_questions_struct):
        tuples_list.append(
            (graphquestion.qid, graphquestion.question,
             graphquestion.graph_query, graphquestion.answer_mid))
    print(len(tuples_list))
    structure_list = ir_module.run_topics_entity_generation_freebase(
        tuples_list=tuples_list,
        node_is_gold=node_is_gold,
        linking_is_gold=linking_is_gold,
        q_mode='graphq')
    write_structure_file(structure_list,
                         structure_with_1_ungrounded_graphq_file)
Пример #21
0
def computed_every_grounded_graph_f1_cwq(input_file):
    all_structure_path = os.listdir(input_file)
    for structure_path in all_structure_path:
        structure_with_grounded_graphq_file = input_file + structure_path
        print(structure_path)
        structure_list = read_structure_file(
            structure_with_grounded_graphq_file)
        for structure in structure_list:
            gold_answer_mid_set = evaluation_utils.get_gold_answers(
                structure.gold_answer)
            for ungrounded_graph in structure.ungrounded_graph_forest:
                for grounded_graph in ungrounded_graph.get_grounded_graph_forest(
                ):
                    system_denotation_set = set(grounded_graph.denotation)
                    recall, precision, f1 = sempre_evaluation.computeF1(
                        gold_answer_mid_set, system_denotation_set)
                    grounded_graph.f1_score = f1
                    if f1 > 0:
                        print(f1)
        write_structure_file(structure_list, input_file + structure_path)
Пример #22
0
def computed_every_grounded_graph_f1_lcquad(input_file):
    from datasets_interface.question_interface import lcquad_1_0_interface
    for structure_path in os.listdir(input_file):
        structure_with_grounded_graphq_file = input_file + structure_path
        print(structure_path)
        structure_list = read_structure_file(
            structure_with_grounded_graphq_file)
        for structure in structure_list:
            gold_answer_mid_set = lcquad_1_0_interface.get_answers_by_question(
                structure.question)  #['http://dbpedia.org/resource/Colorado']
            for ungrounded_graph in structure.ungrounded_graph_forest:
                for grounded_graph in ungrounded_graph.get_grounded_graph_forest(
                ):
                    system_denotation_set = set(grounded_graph.denotation)
                    recall, precision, f1 = sempre_evaluation.computeF1(
                        gold_answer_mid_set, system_denotation_set)
                    grounded_graph.f1_score = f1
                    grounded_graph.recall_score = recall
                    grounded_graph.precision_score = precision
        write_structure_file(structure_list, input_file + structure_path)
Пример #23
0
def run_ungrounded_graph_from_complexwebquestion(
        complexquestin_filepath,
        structure_with_1_ungrounded_cwq_file,
        node_is_gold=False,
        parser_mode='skeleton'):
    from datasets_interface.question_interface import complexwebquestion_interface
    complexwebq_list = complexwebquestion_interface.read_complexwebq_question_json(
        complexquestin_filepath)
    tuples_list = []
    for i, complexwebq_struct in enumerate(complexwebq_list):
        tuples_list.append(
            (complexwebq_struct.ID, complexwebq_struct.question,
             complexwebq_struct.sparql, complexwebq_struct.answers))
    print(len(tuples_list))
    structure_list = sp_modules.run_query_graph_generation(
        tuples_list=tuples_list,
        node_is_gold=node_is_gold,
        parser_mode=parser_mode,
        q_mode='cwq')
    write_structure_file(structure_list, structure_with_1_ungrounded_cwq_file)
Пример #24
0
def computed_every_grounded_graph_f1_lcquad(input_file):
    from datasets_interface.question_interface import lcquad_1_0_interface
    all_structure_path = os.listdir(input_file)
    for structure_path in all_structure_path:
        structure_with_grounded_graphq_file = input_file + structure_path
        print(structure_path)
        structure_list = read_structure_file(
            structure_with_grounded_graphq_file)
        for structure in structure_list:
            gold_answer_mid_set = lcquad_1_0_interface.get_answers_by_question(
                structure.question)
            print('#gold answer:\t', gold_answer_mid_set)
            for ungrounded_graph in structure.ungrounded_graph_forest:
                for grounded_graph in ungrounded_graph.get_grounded_graph_forest(
                ):
                    system_denotation_set = set(grounded_graph.denotation)
                    recall, precision, f1 = sempre_evaluation.computeF1(
                        gold_answer_mid_set, system_denotation_set)
                    grounded_graph.f1_score = f1
                    if f1 > 0:
                        print(f1)
        write_structure_file(structure_list, input_file + structure_path)
Пример #25
0
def run_grounding_graph_score12_match(input_file_folder, q_mode='lcquad'):
    """path candidate grounding graph"""
    from method_ir.grounding.path_match_score12.path_match_interface import PathMatchScore12
    path_match_score12 = PathMatchScore12(q_mode)
    for path in os.listdir(input_file_folder):
        structure_with_grounded_graphq_file = input_file_folder + path
        print(path)
        structure_list = read_structure_file(structure_with_grounded_graphq_file)
        for structure in structure_list:
            question = structure.question
            for j, ungrounded_graph in enumerate(structure.ungrounded_graph_forest):
                if j != len(structure.ungrounded_graph_forest) - 1: continue
                grounded_graph_list = ungrounded_graph.get_grounded_graph_forest()
                try:
                    bert_scores = path_match_score12.set_bert_score_score12(question_normal=question, grounded_graph_forest_list=grounded_graph_list)
                    for grounded_graph, bert_score in zip(grounded_graph_list, bert_scores):
                        grounded_graph.score = bert_score
                except Exception as e:
                    for grounded_graph in grounded_graph_list:
                        grounded_graph.score = 0.0
                    print('error')
        write_structure_file(structure_list, structure_with_grounded_graphq_file)
    print('over')
Пример #26
0
def run_grounded_node_grounding_dbpedia(structure_with_ungrounded_graphq_file,
                                        output_file,
                                        linking_is_gold=False):
    '''
    function: 1.0 ungrounded query  ->  2.1 grounded query
    input: structure_ungrounded_graphq_file
    :return: grounded graph with entity linking
    '''
    from datasets_interface.question_interface import lcquad_1_0_interface
    from method_sp.grounding._2_1_grounded_graph.node_linking import node_linking_interface_dbpedia
    from method_sp.grounding._2_1_grounded_graph.grounded_graph_2_1_generation import generate_grounded_graph_interface
    structure_list = read_structure_file(structure_with_ungrounded_graphq_file)
    for structure in structure_list:
        print(structure.qid)
        for i, ungrounded_graph in enumerate(
                structure.get_ungrounded_graph_forest()):
            if i != len(structure.get_ungrounded_graph_forest()) - 1:
                continue
            grounding_result_list = []
            for node in ungrounded_graph.nodes:
                if linking_is_gold:
                    result_dict = lcquad_1_0_interface.get_topic_entities_by_question_and_mention(
                        question=structure.question,
                        mention=node.friendly_name)
                    grounding_result_list.append((node, result_dict))
                else:
                    grounding_result_list.append(
                        (node,
                         node_linking_interface_dbpedia.node_linking(
                             node=node)))
            grouned_graph_list = generate_grounded_graph_interface(
                ungrounded_graph=ungrounded_graph,
                grounding_result_list=grounding_result_list)
            ungrounded_graph.set_grounded_linking(grounding_result_list)
            ungrounded_graph.set_grounded_graph_forest(grouned_graph_list)
    write_structure_file(structure_list, output_file)
Пример #27
0
def run_grounding_graph_cnn_match(input_file_folder):
    '''path candidate grounding graph'''
    import os
    from grounding.ranking.path_match_cnn.cnn_match_interface import CNNMatchInterface
    from parsing import parsing_utils
    cmi = CNNMatchInterface()
    for path in os.listdir(input_file_folder):
        print(path)
        structure_with_grounded_graphq_file = input_file_folder + path
        structure_list = read_structure_file(
            structure_with_grounded_graphq_file)
        for structure in structure_list:
            question = structure.question
            for ungrounded_graph in structure.ungrounded_graph_forest:
                question = parsing_utils.extract_importantwords_from_cnn(
                    question, ungrounded_graph)
                for grounded_graph in ungrounded_graph.get_grounded_graph_forest(
                ):
                    grounded_graph.score = cmi.get_path_pro(
                        (grounded_graph.key_path), question)
                    print(grounded_graph.key_path, question,
                          grounded_graph.score)
        write_structure_file(structure_list,
                             structure_with_grounded_graphq_file)
Пример #28
0
def run_grounding_graph_update_denotation_graphq(input_file_folder):
    for path in os.listdir(input_file_folder):
        structure_with_grounded_graphq_file = input_file_folder + path
        print(path)
        structure_list = read_structure_file(
            structure_with_grounded_graphq_file)
        is_aggregation = False
        for structure in structure_list:
            qtype = structure.compositionality_type
            assert qtype in ["bgp", "count", "superlative", "comparative"]
            if qtype in ['bgp', 'count']:
                continue
            is_aggregation = True
            for j, ungrounded_graph in enumerate(
                    structure.ungrounded_graph_forest):
                if j != len(structure.ungrounded_graph_forest) - 1:
                    continue
                grounded_graph_list = ungrounded_graph.get_grounded_graph_forest(
                )

                if qtype in ['superlative']:
                    superlative_q_type = structure.function
                    for grounded_graph in grounded_graph_list:
                        print(grounded_graph.grounded_query_id)
                        sample_value = grounded_graph.denotation[0][1]
                        denotation_length = len(grounded_graph.denotation)
                        new_denotation = set()
                        try:
                            if denotation_length > 1 and operation_utils.is_number(
                                    sample_value):
                                start = time.clock()
                                new_denotation.add(
                                    operation_utils.
                                    get_denotation_with_superlativeconstraint_float(
                                        denotation_with_value_list=
                                        grounded_graph.denotation,
                                        superlative_type=superlative_q_type))
                                end = time.clock()
                                print(
                                    'get_denotation_with_superlativeconstraint_float:\t',
                                    end - start)
                            elif denotation_length > 1 and operation_utils.isVaildDate(
                                    sample_value):
                                start = time.clock()
                                new_denotation.add(
                                    operation_utils.
                                    get_denotation_with_superlativeconstraint_datetime(
                                        denotation_with_value_list=
                                        grounded_graph.denotation,
                                        superlative_type=superlative_q_type))
                                end = time.clock()
                                print(
                                    'get_denotation_with_superlativeconstraint_datetime:\t',
                                    end - start)
                            else:
                                start = time.clock()
                                for answer in grounded_graph.denotation:
                                    if type(answer) == list:
                                        new_denotation.add(answer[0])
                                    else:
                                        new_denotation.add(answer)
                                end = time.clock()
                                print('grounded_graph.denotation1:\t',
                                      end - start)
                        except Exception as e:
                            start = time.clock()
                            for answer in grounded_graph.denotation:
                                if type(answer) == list:
                                    new_denotation.add(answer[0])
                                else:
                                    new_denotation.add(answer)
                            end = time.clock()
                            print('grounded_graph.denotation2:\t', end - start)
                        grounded_graph.denotation = list(new_denotation)

                elif qtype in ['comparative']:
                    q_function = structure.function
                    normalization_value = None
                    for ungrounded_node in ungrounded_graph.nodes:
                        if ungrounded_node.normalization_value is not None:
                            normalization_value = ungrounded_node.normalization_value
                            break
                    for grounded_graph in grounded_graph_list:
                        denotation_length = len(grounded_graph.denotation)
                        try:
                            if denotation_length > 1 and (
                                    '^^xsd:dateTime' in normalization_value or
                                    '^^http://www.w3.org/2001/XMLSchema#datetime'
                                    in normalization_value):
                                new_denotation = operation_utils.filter_by_datetime_compare(
                                    denotation=grounded_graph.denotation,
                                    compare_element=normalization_value.split(
                                        '^^')[0],
                                    q_function=q_function)
                            elif denotation_length > 1:
                                # composition[['1768.0^^http://www.w3.org/2001/XMLSchema#double', 'literal']]
                                # composition[['1^^http://www.w3.org/2001/XMLSchema#int', 'literal']]
                                if '^^' in normalization_value:
                                    normalization_value = normalization_value.split(
                                        '^^')[0]
                                new_denotation = operation_utils.filter_by_float_compare(
                                    denotation=grounded_graph.denotation,
                                    compare_element=normalization_value,
                                    q_function=q_function)
                            else:
                                all_denotation = set()
                                for answer in grounded_graph.denotation:
                                    if type(answer) == list:
                                        all_denotation.add(answer[0])
                                    else:
                                        all_denotation.add(answer)
                                new_denotation = list(all_denotation)
                            grounded_graph.denotation = new_denotation
                        except Exception as e:
                            all_denotation = set()
                            for answer in grounded_graph.denotation:
                                if type(answer) == list:
                                    all_denotation.add(answer[0])
                                else:
                                    all_denotation.add(answer)
                            print('error', grounded_graph.grounded_query_id,
                                  grounded_graph.denotation)
                            grounded_graph.denotation = list(all_denotation)

        if is_aggregation:
            write_structure_file(structure_list,
                                 structure_with_grounded_graphq_file)
    print('over')
Пример #29
0
def run_grounded_node_grounding_freebase(structure_with_ungrounded_graphq_file,
                                         output_file,
                                         linking_is_gold=False,
                                         q_mode='graphq'):
    '''
    function: 1.0 ungrounded query  ->  2.1 grounded query
    input: structure_ungrounded_graphq_file
    :return: grounded graph with entity linking
    '''
    from common import globals_args
    from datasets_interface.question_interface import graphquestion_interface
    from datasets_interface.question_interface import complexwebquestion_interface
    from method_sp.grounding._2_1_grounded_graph.node_linking import node_linking_interface_freebase
    from method_sp.grounding._2_1_grounded_graph.grounded_graph_2_1_generation import generate_grounded_graph_interface
    assert q_mode in ['graphq', 'cwq']
    if q_mode == 'cwq':  # aqqu entity linking
        from method_sp.grounding._2_1_grounded_graph.node_linking.entity_linking_aqqu_vocab.surface_index_memory import EntitySurfaceIndexMemory
        elp = EntitySurfaceIndexMemory(
            entity_list_file=globals_args.kb_freebase_latest_file.
            entity_list_file,
            surface_map_file=globals_args.kb_freebase_latest_file.
            surface_map_file,
            entity_index_prefix=globals_args.kb_freebase_latest_file.
            entity_index_prefix)
    elif q_mode == 'graphq':
        from method_sp.grounding._2_1_grounded_graph.node_linking.entity_linking_en_vocab.entity_link_pipeline import EntityLinkPipeline
        elp = EntityLinkPipeline(
            freebase_graph_name_entity_file=globals_args.kb_freebase_en_2013.
            freebase_graph_name_entity,
            freebase_graph_alias_entity_file=globals_args.kb_freebase_en_2013.
            freebase_graph_alias_entity,
            graphquestions_train_friendlyname_entity_file=globals_args.
            kb_freebase_en_2013.graphquestions_train_friendlyname_entity,
            clueweb_mention_pro_entity_file=globals_args.kb_freebase_en_2013.
            clueweb_mention_pro_entity)

    structure_list = read_structure_file(structure_with_ungrounded_graphq_file)
    for structure in structure_list:
        print(structure.qid)
        for i, ungrounded_graph in enumerate(
                structure.get_ungrounded_graph_forest()):
            if i != len(structure.get_ungrounded_graph_forest()) - 1:
                continue
            grounding_result_list = []
            for node in ungrounded_graph.nodes:
                if linking_is_gold:
                    assert q_mode in ['graphq', 'cwq']
                    result_dict = dict()
                    if q_mode == 'graphq':
                        if node.node_type in ['entity', 'class', 'literal']:
                            result_dict = graphquestion_interface.get_topic_entities_by_question_and_mention(
                                question=structure.question,
                                mention=node.friendly_name)
                    elif q_mode == 'cwq':
                        if node.node_type in ['entity', 'class', 'literal']:
                            result_dict = complexwebquestion_interface.get_topic_entities_by_question_and_mention(
                                question_normal=structure.question,
                                mention=node.friendly_name)
                    grounding_result_list.append((node, result_dict))
                else:
                    grounding_result_list.append(
                        (node,
                         node_linking_interface_freebase.node_linking(
                             node=node, elp=elp)))
            grouned_graph_list = generate_grounded_graph_interface(
                ungrounded_graph=ungrounded_graph,
                grounding_result_list=grounding_result_list)
            ungrounded_graph.set_grounded_linking(grounding_result_list)
            ungrounded_graph.set_grounded_graph_forest(grouned_graph_list)
    write_structure_file(structure_list, output_file)