Exemplo n.º 1
0
def test_ground_with_gold():
    for test_sent, answers in test_sentences_perfect_fscore:
        sent = entitylinker.link_entities_in_raw_input(test_sent)
        sent = sentence.Sentence(input_text=sent.input_text, tagged=sent.tagged, entities=sent.entities)

        graphs = staged_generation.generate_with_gold(sent.graphs[0], answers)
        graphs = sorted(graphs, key=lambda x: x[1][2], reverse=True)
        assert graphs[0][1][2] == 1.0

    for test_sent, answers in test_sentences_positive_fscore:
        sent = entitylinker.link_entities_in_raw_input(test_sent)
        sent = sentence.Sentence(input_text=sent.input_text, tagged=sent.tagged, entities=sent.entities)

        graphs = staged_generation.generate_with_gold(sent.graphs[0], answers)
        graphs = sorted(graphs, key=lambda x: x[1][2], reverse=True)
        assert graphs[0][1][2] > 0.5
Exemplo n.º 2
0
def ground_with_model(input_graphs, s, qa_model, min_score, beam_size=10, verify_with_wikidata=True):
    """

    :param input_graphs: a list of equivalent graph extensions to choose from.
    :param s: sentence
    :param qa_model: a model to evaluate graphs
    :param min_score: filter out graphs that receive a score lower than that from the model.
    :param beam_size: size of the beam
    :return: a list of selected graphs with size = beam_size
    """

    logger.debug("Input graphs: {}".format(len(input_graphs)))
    logger.debug("First input one: {}".format(input_graphs[:1]))

    grounded_graphs = [apply_grounding(s_g, p) for s_g in input_graphs for p in graph_queries.get_graph_groundings(s_g, use_wikidata=verify_with_wikidata)]
    grounded_graphs = filter_second_hops(grounded_graphs)
    logger.debug("Number of possible groundings: {}".format(len(grounded_graphs)))
    if len(grounded_graphs) == 0:
        return []

    sentences = []
    for i in range(0, len(grounded_graphs), V.MAX_NEGATIVE_GRAPHS):
        dummy_sentence = sentence.Sentence()
        dummy_sentence.__dict__.update(s.__dict__)
        dummy_sentence.graphs = [WithScore(s_g, (0.0, 0.0, min_score)) for s_g in grounded_graphs[i:i+V.MAX_NEGATIVE_GRAPHS]]
        sentences.append(dummy_sentence)
    if len(sentences) == 0:
        return []
    samples = V.encode_for_model(sentences, qa_model._model.__class__.__name__)
    model_scores = qa_model.predict_batchwise(*samples).view(-1).data

    logger.debug("model_scores: {}".format(model_scores))
    all_chosen_graphs = [WithScore(grounded_graphs[i], (0.0, 0.0, model_scores[i]))
                         for i in range(len(grounded_graphs)) if model_scores[i] > min_score]

    all_chosen_graphs = sorted(all_chosen_graphs, key=lambda x: x[1], reverse=True)
    if len(all_chosen_graphs) > beam_size:
        all_chosen_graphs = all_chosen_graphs[:beam_size]
    logger.debug("Number of chosen groundings: {}".format(len(all_chosen_graphs)))
    return all_chosen_graphs
def generate(path_to_model, config_file_path):

    config, logger = config_utils.load_config(config_file_path)
    if "evaluation" not in config:
        print("Evaluation parameters not in the config file!")
        sys.exit()

    with open(config['evaluation']['questions']) as f:
        webquestions_questions = json.load(f)

    entitylinker = None
    if 'entity.linking' in config:
        PATH_EL = "../../entity-linking/"
        sys.path.insert(0, PATH_EL)
        from entitylinking import core
        linking_config = config['entity.linking']
        logger.info("Load entity linker")
        entitylinker = getattr(core, linking_config['linker'])(
            logger=logger, **linking_config['linker.options'], pos_tags=True)

    _, word2idx = V.extend_embeddings_with_special_tokens(
        *_utils.load_word_embeddings(
            _utils.RESOURCES_FOLDER +
            "../../resources/embeddings/glove/glove.6B.100d.txt"))
    V.WORD_2_IDX = word2idx

    model_type = path_to_model.split("/")[-1].split("_")[0]
    logger.info(f"Model type: {model_type}")

    logger.info('Loading the model from: {}'.format(path_to_model))

    dummy_net = getattr(models, model_type)()
    container = fackel.TorchContainer(torch_model=dummy_net, logger=logger)
    container.load_from_file(path_to_model)

    graph_queries.FREQ_THRESHOLD = config['evaluation'].get(
        "min.relation.freq", 500)
    logger.debug('Testing')
    global_answers = []
    avg_metrics = np.zeros(3)
    data_iterator = tqdm.tqdm(webquestions_questions, ncols=100, ascii=True)
    for i, q_obj in enumerate(data_iterator):
        q = q_obj.get('utterance', q_obj.get('question'))
        q_index = q_obj['questionid']

        if entitylinker:
            sent = entitylinker.link_entities_in_raw_input(q,
                                                           element_id=q_index)
            if "max.num.entities" in config['evaluation']:
                sent.entities = sent.entities[:config['evaluation']
                                              ["max.num.entities"]]
            sent = sentence.Sentence(input_text=sent.input_text,
                                     tagged=sent.tagged,
                                     entities=sent.entities)
        else:
            tagged = _utils.get_tagged_from_server(q, caseless=q.islower())
            sent = sentence.Sentence(input_text=q,
                                     tagged=tagged,
                                     entities=q_obj['entities'])

        chosen_graphs = staged_generation.generate_with_model(
            sent,
            container,
            beam_size=config['evaluation'].get("beam.size", 10))
        model_answers = []
        g = ({}, )
        if chosen_graphs:
            j = 0
            while not model_answers and j < len(chosen_graphs):
                g = chosen_graphs[j]
                model_answers = graph_queries.get_graph_denotations(g.graph)
                j += 1

        gold_answers = webquestions_io.get_answers_from_question(q_obj)
        metrics = evaluation.retrieval_prec_rec_f1(gold_answers, model_answers)
        global_answers.append((q_index, list(metrics), model_answers, [
            (c_g.graph, float(c_g.scores[2])) for c_g in chosen_graphs[:10]
        ]))
        avg_metrics += metrics
        precision, recall, f1 = tuple(avg_metrics / (i + 1))
        data_iterator.set_postfix(prec=precision, rec=recall, f1=f1)

        if i > 0 and i % 100 == 0:
            with open(config['evaluation']["save.answers.to"],
                      'w') as answers_out:
                json.dump(global_answers,
                          answers_out,
                          sort_keys=True,
                          indent=4,
                          cls=sentence.SentenceEncoder)

    print("Average metrics: {}".format(
        (avg_metrics / (len(webquestions_questions)))))

    logger.debug('Testing is finished')
    with open(config['evaluation']["save.answers.to"], 'w') as answers_out:
        json.dump(global_answers,
                  answers_out,
                  sort_keys=True,
                  indent=4,
                  cls=sentence.SentenceEncoder)
Exemplo n.º 4
0
def generate(path_to_model, config_file_path, seed, gpuid, experiment_tag):
    config, logger = config_utils.load_config(config_file_path,
                                              gpuid=gpuid,
                                              seed=seed)
    if "evaluation" not in config:
        print("Evaluation parameters not in the config file!")
        sys.exit()

    # Get the data set name and load the data set as specified in the config file
    dataset_name = config['evaluation']['questions'].split("/")[-1].split(
        ".")[0]
    logger.info(f"Dataset: {dataset_name}")
    with open(config['evaluation']['questions']) as f:
        webquestions_questions = json.load(f)

    # Load the entity linker if specified, otherwise the entity annotations in the data set will be used
    entitylinker = None
    if 'entity.linking' in config:
        PATH_EL = "../../entity-linking/"
        sys.path.insert(0, PATH_EL)
        from entitylinking import core
        linking_config = config['entity.linking']
        logger.info("Load entity linker")
        entitylinker = getattr(core, linking_config['linker'])(
            logger=logger, **linking_config['linker.options'], pos_tags=True)

    # Load the GloVe word embeddings and embeddings for special tokens
    _, word2idx = V.extend_embeddings_with_special_tokens(
        *_utils.load_word_embeddings(
            _utils.RESOURCES_FOLDER +
            "../../resources/embeddings/glove/glove.6B.100d.txt"))
    # Set the global mapping for words to indices
    V.WORD_2_IDX = word2idx

    # Derive the model type and the full model name from the model file
    model_type = path_to_model.split("/")[-1].split("_")[0]
    model_name = path_to_model.split("/")[-1].replace(".pkl", "")
    logger.info(f"Model type: {model_type}")
    logger.info('Loading the model from: {}'.format(path_to_model))

    # Load the PyTorch model
    dummy_net = getattr(models, model_type)()
    container = fackel.TorchContainer(torch_model=dummy_net, logger=logger)
    container.load_from_file(path_to_model)
    model_gated = container._model._gnn.hp_gated if model_type == "GNNModel" else False

    # Load the freebase entity set that was used top restrict the answer space by the previous work if specified.
    freebase_entity_set = set()
    if config['evaluation'].get('entities.list', False):
        print(f"Using the Freebase entity list")
        freebase_entity_set = _utils.load_blacklist(_utils.RESOURCES_FOLDER +
                                                    "freebase-entities.txt")

    # Compose a file name for the output file
    save_answer_to = config['evaluation']["save.answers.to"]
    if not save_answer_to.endswith(".json"):
        dir_name = config['evaluation'][
            "save.answers.to"] + f"{dataset_name}/{model_type.lower()}/"
        save_answer_to = dir_name + f"{dataset_name}_predictions_{'g' if model_gated else ''}{model_name.lower()}.json"
        if not os.path.exists(dir_name):
            os.makedirs(dir_name)
    print(f"Save output to {save_answer_to}")

    # Init the variables to store the results
    logger.debug('Testing')
    graph_queries.FREQ_THRESHOLD = config['evaluation'].get(
        "min.relation.freq", 500)
    global_answers = []
    avg_metrics = np.zeros(4)

    # Iterate over the questions in the dataset
    data_iterator = tqdm.tqdm(webquestions_questions, ncols=100, ascii=True)
    for i, q_obj in enumerate(data_iterator):
        q = q_obj.get('utterance', q_obj.get('question'))
        q_index = q_obj['questionid']

        if entitylinker:
            sent = entitylinker.link_entities_in_raw_input(q,
                                                           element_id=q_index)
            if "max.num.entities" in config['evaluation']:
                sent.entities = sent.entities[:config['evaluation']
                                              ["max.num.entities"]]
            sent = sentence.Sentence(input_text=sent.input_text,
                                     tagged=sent.tagged,
                                     entities=sent.entities)
        else:
            tagged = _utils.get_tagged_from_server(q, caseless=q.islower())
            sent = sentence.Sentence(input_text=q,
                                     tagged=tagged,
                                     entities=q_obj['entities'])

        chosen_graphs = staged_generation.generate_with_model(
            sent,
            container,
            beam_size=config['evaluation'].get("beam.size", 10))
        model_answers = []
        g = ({}, )
        j = -1
        if chosen_graphs:
            j = 0
            valid_answer_set = False
            while not valid_answer_set and j < len(chosen_graphs):
                g = chosen_graphs[j]
                model_answers = graph_queries.get_graph_denotations(g.graph)
                if model_answers:
                    valid_answer_set = True
                    if freebase_entity_set:
                        labeled_answers = {
                            l.lower()
                            for _, labels in queries.get_labels_for_entities(
                                model_answers).items() for l in labels
                        }
                        valid_answer_set = len(
                            labeled_answers
                            & freebase_entity_set) > len(model_answers) - 1
                j += 1

        gold_answers = webquestions_io.get_answers_from_question(q_obj)
        metrics = evaluation.retrieval_prec_rec_f1(gold_answers, model_answers)
        global_answers.append((q_index, list(metrics), model_answers, [
            (c_g.graph, float(c_g.scores[2])) for c_g in chosen_graphs[:10]
        ]))
        avg_metrics += metrics + (j, )
        precision, recall, f1, g_j = tuple(avg_metrics / (i + 1))
        data_iterator.set_postfix(prec=precision, rec=recall, f1=f1, g_j=g_j)

        # Save intermediate results
        if i > 0 and i % 100 == 0:
            with open(save_answer_to, 'w') as answers_out:
                json.dump(global_answers,
                          answers_out,
                          sort_keys=True,
                          indent=4,
                          cls=sentence.SentenceEncoder)

    avg_metrics = avg_metrics / (len(webquestions_questions))
    print("Average metrics: {}".format(avg_metrics))

    # Fine-grained results, if there is a mapping of questions to the number of relation to find the correct answer
    results_by_hops = {}
    if "qid2hop" in config['evaluation']:
        with open(config['evaluation']['qid2hop']) as f:
            q_index2hop = json.load(f)
        print("Results by hop: ")
        hops_dist = Counter([q_index2hop[p[0]] for p in global_answers])
        results_by_hops = {
            i: np.zeros(3)
            for i in range(max(hops_dist.keys()) + 1)
        }
        for p in global_answers:
            metrics = tuple(p[1])
            results_by_hops[q_index2hop[p[0]]] += metrics
        for m in results_by_hops:
            if hops_dist[m] > 0:
                results_by_hops[m] = results_by_hops[m] / hops_dist[m]
        print(results_by_hops)

    # Add results to the results file
    if "add.results.to" in config['evaluation']:
        print(f"Adding results to {config['evaluation']['add.results.to']}")
        with open(config['evaluation']["add.results.to"], 'a+') as results_out:
            results_out.write(",".join([
                model_name, model_type, "Gated" if model_gated else "Simple",
                str(seed), dataset_name, "full",
                "EntityList" if freebase_entity_set else "NoEntityList"
            ] + [str(el) for el in avg_metrics[:3]]))
            results_out.write("\n")
            # Include fine grained results if available
            if results_by_hops:
                for i in range(max(results_by_hops.keys()) + 1):
                    results_out.write(",".join([
                        model_name, model_type, "Gated"
                        if model_gated else "Simple", container.description,
                        str(seed), dataset_name,
                        str(i),
                        "EntityList" if freebase_entity_set else "NoEntityList"
                    ] + [str(el)
                         for el in results_by_hops[i]] + [experiment_tag]))
                    results_out.write("\n")

    # Save final model output
    with open(save_answer_to, 'w') as answers_out:
        json.dump(global_answers,
                  answers_out,
                  sort_keys=True,
                  indent=4,
                  cls=sentence.SentenceEncoder)