Пример #1
0
    def test_textual_entailment(self):
        predictor = pretrained.decomposable_attention_with_elmo_parikh_2017()

        result = predictor.predict_json({
            "premise":
            "An interplanetary spacecraft is in orbit around a gas giant's icy moon.",
            "hypothesis":
            "The spacecraft has the ability to travel between planets."
        })

        assert result["label_probs"][0] > 0.7  # entailment

        result = predictor.predict_json({
            "premise":
            "Two women are wandering along the shore drinking iced tea.",
            "hypothesis":
            "Two women are sitting on a blanket near some rocks talking about politics."
        })

        assert result["label_probs"][1] > 0.8  # contradiction

        result = predictor.predict_json({
            "premise":
            "A large, gray elephant walked beside a herd of zebras.",
            "hypothesis": "The elephant was lost."
        })

        assert result["label_probs"][2] > 0.6  # neutral
Пример #2
0
    def test_textual_entailment(self):
        predictor = pretrained.decomposable_attention_with_elmo_parikh_2017()

        result = predictor.predict_json({
                "premise": "An interplanetary spacecraft is in orbit around a gas giant's icy moon.",
                "hypothesis": "The spacecraft has the ability to travel between planets."
        })

        assert result["label_probs"][0] > 0.7  # entailment

        result = predictor.predict_json({
                "premise": "Two women are wandering along the shore drinking iced tea.",
                "hypothesis": "Two women are sitting on a blanket near some rocks talking about politics."
        })

        assert result["label_probs"][1] > 0.8  # contradiction

        result = predictor.predict_json({
                "premise": "A large, gray elephant walked beside a herd of zebras.",
                "hypothesis": "The elephant was lost."
        })

        assert result["label_probs"][2] > 0.6  # neutral
def IR(sent_select_method):
    if sent_select_method == 'esim':
        esim = pretrained.esim_nli_with_elmo_chen_2017()
    elif sent_select_method == 'entail':
        attention = pretrained.decomposable_attention_with_elmo_parikh_2017()
    else:
        print("loading embedding...")
        nlp = spacy.load('en_vectors_web_lg')  # 300-dim GloVe vectors
        # TODO: change num_vector?
        print("finished loading embedding...")

    titles_dict = xdb_query.load_xapian_titles(Args.OBJECTS, Args.TITLES)
    predictor = get_constituency_parser()

    with open(DATA_SET, 'r') as data_set_f:
        data_set = json.load(data_set_f)

    num_sents = 1
    print("num sents selected", num_sents)
    output_content = {}
    for id_, record in tqdm(data_set.items()):
        parse_result = predictor.predict_json({"sentence": record['claim']})

        NPs = get_constituency_parsing_NPs(parse_result, NPs=set())
        NPs = get_customised_NPs(parse_result, NPs=NPs)

        # doc_ids = NPs2titles(NPs, titles_dict)

        if Args.LOG_MISSING_DOCS:
            evidence = list(map(lambda x: x[0], record['evidence']))
            matched_titles, missing = result_stat(evidence, NPs)
            log_missing(missing, record, NPs, parse_result)

        # Sentence selection
        # if sent_select_method == 'esim':
        #     sents = sent_selection_esim(esim,
        #                                 record['claim'],
        #                                 doc_ids,
        #                                 Args.DB_PATH,
        #                                 num_sents)
        # elif sent_select_method == 'entail':   # texual entailment
        #     # not very well
        #     sents = sent_selection_entail(attention,
        #                                   record['claim'],
        #                                   doc_ids,
        #                                   Args.DB_PATH,
        #                                   num_sents)
        # else:                                   # similarity
        #     sents = sent_selection_sim(nlp,
        #                                record['claim'],
        #                                doc_ids,
        #                                Args.DB_PATH,
        #                                num_sents)  # 1 sent is the best

        # record['evidence'] = sents

        t = [NP for NP in NPs if NP in titles_dict]
        # randomly select 'sentence 0' from each doc to do doc retrieval
        record['evidence'] = [[title, 0] for title in t]
        output_content[id_] = record

    return output_content