示例#1
0
    def test_DocumentLevelRelationEvaluator_repeated_relations_irrelevant(
            self):

        evaluator = DocumentLevelRelationEvaluator(rel_type=STUB_R_ID_1)

        dataset = Dataset()
        doc_1 = Document()
        part_1 = Part('_irrelevant_')
        dataset.documents['doc_1'] = doc_1
        doc_1.parts['part_1'] = part_1

        part_1.relations = [
            Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "TOOL"),
                     Entity(STUB_E_ID_2, 0, "maynard")),
            Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "TOOL"),
                     Entity(STUB_E_ID_2, 0, "Danny Carey")),
            Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 1, "TOOL"),
                     Entity(STUB_E_ID_2, 1, "Danny Carey")),
        ]

        # -

        part_1.predicted_relations = [
            Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "TOOL"),
                     Entity(STUB_E_ID_2, 0, "maynard")),
            Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 1, "TOOL"),
                     Entity(STUB_E_ID_2, 1, "maynard")),
        ]

        self._apply_pipeline(dataset)

        # ---

        evals = evaluator.evaluate(dataset)
        evaluation = evals(STUB_R_ID_1)
        self.assertEqual(evaluation.tp, 1)
        self.assertEqual(evaluation.fn, 1)
        self.assertEqual(evaluation.fp, 0)
        computation = evals(STUB_R_ID_1).compute(strictness="exact")
        self.assertEqual(computation.f_measure, 0.6666666666666666)

        # -

        part_1.predicted_relations = [
            Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 2, "TOOL"),
                     Entity(STUB_E_ID_2, 2, "maynard")),
            Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 3, "TOOL"),
                     Entity(STUB_E_ID_2, 3, "maynard")),
            Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 4, "TOOL"),
                     Entity(STUB_E_ID_2, 4, "Danny Carey")),
        ]

        evals = evaluator.evaluate(dataset)
        evaluation = evals(STUB_R_ID_1)
        self.assertEqual(evaluation.tp, 2)
        self.assertEqual(evaluation.fn, 0)
        self.assertEqual(evaluation.fp, 0)
        computation = evals(STUB_R_ID_1).compute(strictness="exact")
        self.assertEqual(computation.f_measure, 1.0)
示例#2
0
    def test_DocumentLevelRelationEvaluator_default_entities_case_irrelevant(
            self):

        evaluator = DocumentLevelRelationEvaluator(rel_type=STUB_R_ID_1)

        dataset = Dataset()
        doc_1 = Document()
        part_1 = Part('_irrelevant_')
        dataset.documents['doc_1'] = doc_1
        doc_1.parts['part_1'] = part_1

        part_1.relations = [
            Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "TOOL"),
                     Entity(STUB_E_ID_2, 0, "maynard")),
        ]

        # -

        part_1.predicted_relations = [
            # empty
        ]

        self._apply_pipeline(dataset)

        # -

        evals = evaluator.evaluate(dataset)
        evaluation = evals(STUB_R_ID_1)
        self.assertEqual(evaluation.tp, 0)
        computation = evals(STUB_R_ID_1).compute(strictness="exact")
        self.assertEqual(computation.f_measure, 0.0)

        # ---

        part_1.predicted_relations = [
            Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "TOOL"),
                     Entity(STUB_E_ID_2, 0, "maynard")),
        ]

        evals = evaluator.evaluate(dataset)
        evaluation = evals(STUB_R_ID_1)
        self.assertEqual(evaluation.tp, 1)
        computation = evals(STUB_R_ID_1).compute(strictness="exact")
        self.assertEqual(computation.f_measure, 1.0)

        # -

        part_1.predicted_relations = [
            Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "tool"),
                     Entity(STUB_E_ID_2, 0, "MAYNARD")),
        ]

        evals = evaluator.evaluate(dataset)
        evaluation = evals(STUB_R_ID_1)
        self.assertEqual(evaluation.tp, 1)
        computation = evals(STUB_R_ID_1).compute(strictness="exact")
        self.assertEqual(computation.f_measure, 1.0)
示例#3
0
    def test_DocumentLevelRelationEvaluator_arbitrary_relation_accept_fun_order_does_not_matter(self):

        entity_map_fun = (lambda e: "SAME")

        def relation_accept_fun(gold, pred):
            print('gold:', gold, ' <---> ', 'pred:', pred)
            return gold == pred

        r1 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "yin"), Entity(STUB_E_ID_2, 0, "yan"))
        r2 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "yan"), Entity(STUB_E_ID_2, 0, "yin"))

        self.assertTrue(relation_accept_fun(r1.map(entity_map_fun), r1.map(entity_map_fun)))
        self.assertTrue(relation_accept_fun(r1.map(entity_map_fun), r2.map(entity_map_fun)))
        self.assertTrue(relation_accept_fun(r2.map(entity_map_fun), r1.map(entity_map_fun)))

        evaluator = DocumentLevelRelationEvaluator(STUB_R_ID_1, entity_map_fun, relation_accept_fun)

        (dataset, part) = self._create_basic_dataset()

        # -

        part.relations = [r1]
        part.predicted_relations = [r1]

        evals = evaluator.evaluate(dataset)
        evaluation = evals(STUB_R_ID_1)
        print(evaluation)
        self.assertEqual(evaluation.tp, 1)
        self.assertEqual(evaluation.fn, 0)
        self.assertEqual(evaluation.fp, 0)
        computation = evals(STUB_R_ID_1).compute(strictness="exact")
        self.assertEqual(computation.f_measure, 1.0)
示例#4
0
    def test_DocumentLevelRelationEvaluator_false_positives(self):

        evaluator = DocumentLevelRelationEvaluator(rel_type=STUB_R_ID_1)

        dataset = Dataset()
        doc_1 = Document()
        part_1 = Part('_irrelevant_ PART *1*')
        dataset.documents['doc_1'] = doc_1
        doc_1.parts['part_1'] = part_1

        part_2 = Part('_irrelevant_ PART *2*')
        dataset.documents['doc_1'] = doc_1
        doc_1.parts['part_2'] = part_2

        part_1.relations = [
            Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "TOOL"), Entity(STUB_E_ID_2, 0, "Maynard")),
        ]

        # -

        part_2.predicted_relations = [
            Relation(STUB_R_ID_1, Entity(STUB_E_ID_2, 0, "TOOL"), Entity(STUB_E_ID_1, 0, "Snoop Dog")),
        ]

        self._apply_pipeline(dataset)

        # ---

        evals = evaluator.evaluate(dataset)
        evaluation = evals(STUB_R_ID_1)
        self.assertEqual(evaluation.tp, 0)
        self.assertEqual(evaluation.fn, 1)
        self.assertEqual(evaluation.fp, 1)
        computation = evals(STUB_R_ID_1).compute(strictness="exact")
        self.assertEqual(computation.f_measure, 0.0)
示例#5
0
    def test_StubSamePartRelationExtractor(self):

        dataset = TestTaggers.get_test_dataset()

        annotator = StubSamePartRelationExtractor(STUB_E_ID_1,
                                                  STUB_E_ID_2,
                                                  relation_type=STUB_R_ID_1)
        annotator.annotate(dataset)
        # Assert that indeed 4 sentences were considered
        assert 4 == len(list(dataset.sentences())), str(
            list(dataset.sentences()))

        print("actu_rels", list(dataset.relations()))
        print("edges", list(dataset.edges()))
        print("pred_rels", list(dataset.predicted_relations()))

        evaluator = DocumentLevelRelationEvaluator(rel_type=STUB_R_ID_1)

        evals = evaluator.evaluate(dataset)
        evaluation = evals(STUB_R_ID_1)
        self.assertEqual(evaluation.tp, 3)
        self.assertEqual(evaluation.fn, 0)
        self.assertEqual(evaluation.fp, 3)
        computation = evals(STUB_R_ID_1).compute(strictness="exact")
        self.assertEqual(computation.f_measure, 0.6666666666666666)
示例#6
0
    def test_DocumentLevelRelationEvaluator_arbitrary_relation_accept_fun_ignore_some_predictions(self):

        entity_map_fun = (lambda e: e.text)

        def relation_accept_fun(gold, pred):
            gold_pred_char_num = int(gold[-1])
            pred_last_char_num = int(pred[-1])
            print('gold:', gold, ' <---> ', 'pred:', pred,)

            if gold == pred:  # 1 == 1
                return True
            elif gold < pred:  # 1 < 2
                return None
            else:
                return False   # 1 !<= 0

            return gold == pred

        r1 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "1"))

        r2 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "1"))  # Accept
        r3 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "2"))  # Ignore
        r4 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "0"))  # Reject

        self.assertEqual(True, relation_accept_fun(r1.map(entity_map_fun), r2.map(entity_map_fun)))
        self.assertEqual(None, relation_accept_fun(r1.map(entity_map_fun), r3.map(entity_map_fun)))
        self.assertEqual(False, relation_accept_fun(r1.map(entity_map_fun), r4.map(entity_map_fun)))

        evaluator = DocumentLevelRelationEvaluator(STUB_R_ID_1, entity_map_fun, relation_accept_fun)

        (dataset, part) = self._create_basic_dataset()

        # -

        part.relations = [r1]
        part.predicted_relations = [r2, r4] + [r3, r3, r3, r3, r3]  # All the r3's should be ignored

        evals = evaluator.evaluate(dataset)
        evaluation = evals(STUB_R_ID_1)
        print(evaluation)
        self.assertEqual(evaluation.tp, 1)
        self.assertEqual(evaluation.fn, 0)
        self.assertEqual(evaluation.fp, 1)
        computation = evals(STUB_R_ID_1).compute(strictness="exact")
        self.assertEqual(computation.f_measure, 0.6666666666666666)
示例#7
0
    def test_Stub_D0_plus_D1_RelationExtractor(self):

        dataset = TestTaggers.get_test_dataset()

        edge_generator_1 = SentenceDistanceEdgeGenerator(STUB_E_ID_1,
                                                         STUB_E_ID_2,
                                                         STUB_R_ID_1,
                                                         distance=0,
                                                         rewrite_edges=False)
        edge_generator_2 = SentenceDistanceEdgeGenerator(STUB_E_ID_1,
                                                         STUB_E_ID_2,
                                                         STUB_R_ID_1,
                                                         distance=1,
                                                         rewrite_edges=False)
        edge_generator = CombinatorEdgeGenerator(edge_generator_1,
                                                 edge_generator_2)
        annotator = StubRelationExtractor(edge_generator)

        annotator.annotate(dataset)
        # Assert that indeed 4 sentences were considered
        assert 4 == len(list(dataset.sentences())), str(
            list(dataset.sentences()))

        # print("actu_rels", list(dataset.relations()))
        # print("edges", list(dataset.edges()))
        # print("pred_rels", list(dataset.predicted_relations()))

        evaluator = DocumentLevelRelationEvaluator(rel_type=STUB_R_ID_1)

        evals = evaluator.evaluate(dataset)
        evaluation = evals(STUB_R_ID_1)
        self.assertEqual(evaluation.tp, 3)
        self.assertEqual(evaluation.fn, 0)
        self.assertEqual(evaluation.fp, 2)
        computation = evals(STUB_R_ID_1).compute(strictness="exact")
        self.assertEqual(computation.f_measure, 0.7499999999999999)
示例#8
0
    def test_DocumentLevelRelationEvaluator_arbitrary_relation_accept_fun_dont_count_multiple_same_hits(self):

        entity_map_fun = (lambda e: e.text)

        def relation_accept_fun(gold, pred):
            print('gold:', gold, ' <---> ', 'pred:', pred,)
            gold = int(gold[-1])
            pred = int(pred[-1])


            if gold <= pred and ((pred - gold) < 3):  # e.g., 1 <= 1, 2, 3
                return True
            else:
                return False

            return gold == pred

        r1 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "1"))
        r5 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "9"))  # Missing == fn
        r6 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "5"))
        r8 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "2"))  # (maps to 1) Own repetition in gold, so 1 should be counted twice

        r2 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "1"))  # Accept 1 --> do count == tp
        r3 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "2"))  # repeated Accept 1,2 --> do count because of own repetition in gold == tp
        r4 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "3"))  # repeated Accept 1,2 --> do not count because it's over repetition
        r7 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "6"))  # Accept 5 --> do count == tp
        r9 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "5"))  # Accept 5 --> do not count because it's over repetition

        self.assertEqual(True, relation_accept_fun(r1.map(entity_map_fun), r2.map(entity_map_fun)))
        self.assertEqual(True, relation_accept_fun(r1.map(entity_map_fun), r3.map(entity_map_fun)))
        self.assertEqual(True, relation_accept_fun(r1.map(entity_map_fun), r4.map(entity_map_fun)))
        self.assertEqual(False, relation_accept_fun(r1.map(entity_map_fun), r7.map(entity_map_fun)))

        self.assertEqual(False, relation_accept_fun(r5.map(entity_map_fun), r2.map(entity_map_fun)))
        self.assertEqual(False, relation_accept_fun(r5.map(entity_map_fun), r3.map(entity_map_fun)))
        self.assertEqual(False, relation_accept_fun(r5.map(entity_map_fun), r4.map(entity_map_fun)))
        self.assertEqual(False, relation_accept_fun(r5.map(entity_map_fun), r7.map(entity_map_fun)))

        self.assertEqual(True, relation_accept_fun(r6.map(entity_map_fun), r7.map(entity_map_fun)))

        self.assertEqual(False, relation_accept_fun(r8.map(entity_map_fun), r2.map(entity_map_fun)))
        self.assertEqual(True, relation_accept_fun(r8.map(entity_map_fun), r3.map(entity_map_fun)))
        self.assertEqual(True, relation_accept_fun(r8.map(entity_map_fun), r4.map(entity_map_fun)))
        self.assertEqual(False, relation_accept_fun(r8.map(entity_map_fun), r7.map(entity_map_fun)))

        evaluator = DocumentLevelRelationEvaluator(STUB_R_ID_1, entity_map_fun, relation_accept_fun)

        (dataset, part) = self._create_basic_dataset()

        # -

        part.relations = [r1, r5, r6, r8]
        part.predicted_relations = [r2, r3, r4, r7, r9]  # Only one shold be accepted

        evals = evaluator.evaluate(dataset)
        evaluation = evals(STUB_R_ID_1)
        print(evaluation)
        self.assertEqual(evaluation.tp, 3, evaluation)
        self.assertEqual(evaluation.fn, 1)
        self.assertEqual(evaluation.fp, 0)
        computation = evals(STUB_R_ID_1).compute(strictness="exact")
        self.assertEqual(computation.f_measure, 0.8571428571428571)
示例#9
0
    def test_DocumentLevelRelationEvaluator_arbitrary_relation_accept_fun_order_matters(self):

        entity_map_fun = (lambda e: e.text)

        def relation_accept_fun(gold, pred):
            print('gold:', gold, ' <---> ', 'pred:', pred)
            return gold < pred

        r1 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "1"), Entity(STUB_E_ID_2, 0, "2"))
        r2 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "2"), Entity(STUB_E_ID_2, 0, "1"))

        # r1 not equiv r1 because this IS NOT equals (r1 not < r1)
        self.assertFalse(relation_accept_fun(r1.map(entity_map_fun), r1.map(entity_map_fun)))
        # r1 < r2
        self.assertTrue(relation_accept_fun(r1.map(entity_map_fun), r2.map(entity_map_fun)))
        # r2 not < r1
        self.assertFalse(relation_accept_fun(r2.map(entity_map_fun), r1.map(entity_map_fun)))

        evaluator = DocumentLevelRelationEvaluator(STUB_R_ID_1, entity_map_fun, relation_accept_fun)

        (dataset, part) = self._create_basic_dataset()

        # -

        part.relations = [r1]
        part.predicted_relations = [r1]

        evals = evaluator.evaluate(dataset)
        evaluation = evals(STUB_R_ID_1)
        print(evaluation)
        self.assertEqual(evaluation.tp, 0)
        self.assertEqual(evaluation.fn, 1)
        self.assertEqual(evaluation.fp, 1)
        computation = evals(STUB_R_ID_1).compute(strictness="exact")
        self.assertEqual(computation.f_measure, 0.0)

        # -

        part.relations = [r1]
        part.predicted_relations = [r2]

        evals = evaluator.evaluate(dataset)
        evaluation = evals(STUB_R_ID_1)
        print(evaluation)
        self.assertEqual(evaluation.tp, 1)
        self.assertEqual(evaluation.fn, 0)
        self.assertEqual(evaluation.fp, 0)
        computation = evals(STUB_R_ID_1).compute(strictness="exact")
        self.assertEqual(computation.f_measure, 1.0)

        # -

        part.relations = [r2]
        part.predicted_relations = [r1]

        evals = evaluator.evaluate(dataset)
        evaluation = evals(STUB_R_ID_1)
        self.assertEqual(evaluation.tp, 0)
        self.assertEqual(evaluation.fn, 1)
        self.assertEqual(evaluation.fp, 1)
        computation = evals(STUB_R_ID_1).compute(strictness="exact")
        self.assertEqual(computation.f_measure, 0.0)
示例#10
0
    def test_DocumentLevelRelationEvaluator_normalized_entities(self):

        evaluator = DocumentLevelRelationEvaluator(
            rel_type=STUB_R_ID_1,
            entity_map_fun=DocumentLevelRelationEvaluator.COMMON_ENTITY_MAP_FUNS['normalized_fun'](
                {STUB_E_ID_1: 'n_1', STUB_E_ID_2: 'n_1'},
                penalize_unknown_normalizations="no")
        )

        dataset = Dataset()
        doc_1 = Document()
        part_1 = Part('_irrelevant_')
        dataset.documents['doc_1'] = doc_1
        doc_1.parts['part_1'] = part_1

        part_1.relations = [
            Relation(
                STUB_R_ID_1,
                Entity(STUB_E_ID_1, 0, "Tool", norms={"n_1": "1964"}),
                Entity(STUB_E_ID_2, 0, "Maynard", norms={"n_1": "1961"})),
        ]

        # -

        part_1.predicted_relations = [
            Relation(
                # One without normalization, one with another different normalization
                STUB_R_ID_1,
                Entity(STUB_E_ID_1, 0, "Tool"),
                Entity(STUB_E_ID_2, 0, "Maynard", norms={"n_x": "1961"})),

            Relation(
                # One with different normalization, one with another different normalization
                STUB_R_ID_1,
                Entity(STUB_E_ID_1, 0, "Tool", norms={"n_1": "666"}),
                Entity(STUB_E_ID_2, 0, "Maynard", norms={"n_x": "1961"})),

            Relation(
                # Both with the correct normalization ids, but one has wrong normalization
                STUB_R_ID_1,
                Entity(STUB_E_ID_1, 0, "Tool", norms={"n_1": "666"}),
                Entity(STUB_E_ID_2, 0, "Maynard", norms={"n_1": "1961"})),

            Relation(
                # Both with another different normalization
                STUB_R_ID_1,
                Entity(STUB_E_ID_1, 0, "Tool", norms={"n_another_key": "1964"}),
                Entity(STUB_E_ID_2, 0, "Maynard", norms={"n_another_key": "1961"})),
        ]

        self._apply_pipeline(dataset)

        # ---

        evals = evaluator.evaluate(dataset)
        evaluation = evals(STUB_R_ID_1)
        self.assertEqual(evaluation.tp, 0)
        self.assertEqual(evaluation.fn, 1)
        self.assertEqual(evaluation.fp, 1)
        computation = evals(STUB_R_ID_1).compute(strictness="exact")
        self.assertEqual(computation.f_measure, 0.0)

        # -

        part_1.predicted_relations = [
            Relation(
                STUB_R_ID_1,
                Entity(STUB_E_ID_1, 0, "Tool band", norms={"n_1": "1964"}),
                Entity(STUB_E_ID_2, 0, "Maynard James Keenan", norms={"n_1": "1961"})),
        ]

        evals = evaluator.evaluate(dataset)
        evaluation = evals(STUB_R_ID_1)
        self.assertEqual(evaluation.tp, 1)
        self.assertEqual(evaluation.fn, 0)
        self.assertEqual(evaluation.fp, 0)
        computation = evals(STUB_R_ID_1).compute(strictness="exact")
        self.assertEqual(computation.f_measure, 1.0)