Пример #1
0
    def test_DocumentLevelRelationEvaluator_parts_irrelevant(self):

        evaluator = DocumentLevelRelationEvaluator(rel_type=STUB_R_ID_1)

        dataset = Dataset()
        doc_1 = Document()
        part_1 = Part('_irrelevant_ PART *1*')
        dataset.documents['doc_1'] = doc_1
        doc_1.parts['part_1'] = part_1

        part_2 = Part('_irrelevant_ PART *2*')
        dataset.documents['doc_1'] = doc_1
        doc_1.parts['part_2'] = part_2

        part_1.relations = [
            Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "TOOL"), Entity(STUB_E_ID_2, 0, "maynard")),
        ]

        # -

        part_2.predicted_relations = [
            Relation(STUB_R_ID_1, Entity(STUB_E_ID_2, 0, "maynard"), Entity(STUB_E_ID_1, 0, "TOOL")),
        ]

        self._apply_pipeline(dataset)

        # ---

        evals = evaluator.evaluate(dataset)
        evaluation = evals(STUB_R_ID_1)
        self.assertEqual(evaluation.tp, 1)
        self.assertEqual(evaluation.fn, 0)
        self.assertEqual(evaluation.fp, 0)
        computation = evals(STUB_R_ID_1).compute(strictness="exact")
        self.assertEqual(computation.f_measure, 1.0)
Пример #2
0
    def test_DocumentLevelRelationEvaluator_default_entities_case_irrelevant(
            self):

        evaluator = DocumentLevelRelationEvaluator(rel_type=STUB_R_ID_1)

        dataset = Dataset()
        doc_1 = Document()
        part_1 = Part('_irrelevant_')
        dataset.documents['doc_1'] = doc_1
        doc_1.parts['part_1'] = part_1

        part_1.relations = [
            Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "TOOL"),
                     Entity(STUB_E_ID_2, 0, "maynard")),
        ]

        # -

        part_1.predicted_relations = [
            # empty
        ]

        self._apply_pipeline(dataset)

        # -

        evals = evaluator.evaluate(dataset)
        evaluation = evals(STUB_R_ID_1)
        self.assertEqual(evaluation.tp, 0)
        computation = evals(STUB_R_ID_1).compute(strictness="exact")
        self.assertEqual(computation.f_measure, 0.0)

        # ---

        part_1.predicted_relations = [
            Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "TOOL"),
                     Entity(STUB_E_ID_2, 0, "maynard")),
        ]

        evals = evaluator.evaluate(dataset)
        evaluation = evals(STUB_R_ID_1)
        self.assertEqual(evaluation.tp, 1)
        computation = evals(STUB_R_ID_1).compute(strictness="exact")
        self.assertEqual(computation.f_measure, 1.0)

        # -

        part_1.predicted_relations = [
            Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "tool"),
                     Entity(STUB_E_ID_2, 0, "MAYNARD")),
        ]

        evals = evaluator.evaluate(dataset)
        evaluation = evals(STUB_R_ID_1)
        self.assertEqual(evaluation.tp, 1)
        computation = evals(STUB_R_ID_1).compute(strictness="exact")
        self.assertEqual(computation.f_measure, 1.0)
Пример #3
0
    def test_DocumentLevelRelationEvaluator_arbitrary_relation_accept_fun_order_does_not_matter(self):

        entity_map_fun = (lambda e: "SAME")

        def relation_accept_fun(gold, pred):
            print('gold:', gold, ' <---> ', 'pred:', pred)
            return gold == pred

        r1 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "yin"), Entity(STUB_E_ID_2, 0, "yan"))
        r2 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "yan"), Entity(STUB_E_ID_2, 0, "yin"))

        self.assertTrue(relation_accept_fun(r1.map(entity_map_fun), r1.map(entity_map_fun)))
        self.assertTrue(relation_accept_fun(r1.map(entity_map_fun), r2.map(entity_map_fun)))
        self.assertTrue(relation_accept_fun(r2.map(entity_map_fun), r1.map(entity_map_fun)))

        evaluator = DocumentLevelRelationEvaluator(STUB_R_ID_1, entity_map_fun, relation_accept_fun)

        (dataset, part) = self._create_basic_dataset()

        # -

        part.relations = [r1]
        part.predicted_relations = [r1]

        evals = evaluator.evaluate(dataset)
        evaluation = evals(STUB_R_ID_1)
        print(evaluation)
        self.assertEqual(evaluation.tp, 1)
        self.assertEqual(evaluation.fn, 0)
        self.assertEqual(evaluation.fp, 0)
        computation = evals(STUB_R_ID_1).compute(strictness="exact")
        self.assertEqual(computation.f_measure, 1.0)
Пример #4
0
    def __read_annjson(self, reader, filename, dataset):
        try:
            doc_id = os.path.basename(filename).replace('.ann.json', '').replace('.json', '')
            if not self.whole_basename_as_docid and '-' in doc_id:
                doc_id = doc_id.split('-')[-1]

            ann_json = json.load(reader)

            try:
                document = dataset.documents[doc_id]
            except Exception as err:
                print_warning("The annjson with docid={} was not in the whole plain dataset.".format(doc_id))
                return doc_id

            if not (ann_json['anncomplete'] or self.is_predicted) and self.delete_incomplete_docs:
                del dataset.documents[doc_id]

            else:

                for e in ann_json['entities']:

                    if self.read_only_class_id is None or e['classId'] in self.read_only_class_id:

                        part = document.parts[e['part']]

                        try:
                            normalizations = {key: obj['source']['id'] for key, obj in e['normalizations'].items()}
                        except KeyError as err:
                            print_warning("The normalization is badly formatted: (docid={}) {}".format(doc_id, str(e['normalizations'])))
                            normalizations = None

                        entity = Entity(
                            e['classId'],
                            e['offsets'][0]['start'],
                            e['offsets'][0]['text'],
                            e['confidence']['prob'],
                            norms=normalizations)

                        if self.is_predicted:
                            part.predicted_annotations.append(entity)
                        else:
                            part.annotations.append(entity)

                if self.read_relations:
                    for relation in ann_json['relations']:
                        # Note: no distinction with predicted_relations yet

                        part = document.parts[relation['entities'][0].split('|')[0]]

                        e1_start = int(relation['entities'][0].split('|')[1].split(',')[0])
                        e2_start = int(relation['entities'][1].split('|')[1].split(',')[0])

                        rel_id = relation['classId']

                        e1 = part.get_entity(e1_start, use_pred=False, raise_exception_on_incosistencies=self.raise_exception_on_incosistencies)
                        e2 = part.get_entity(e2_start, use_pred=False, raise_exception_on_incosistencies=self.raise_exception_on_incosistencies)

                        if (not self.raise_exception_on_incosistencies and (e1 is None or e2 is None)):
                            continue

                        rel = Relation(rel_id, e1, e2)

                        part.relations.append(rel)

                # delete parts that are not annotatable
                annotatable_parts = set(ann_json['annotatable']['parts'])
                part_ids_to_del = []
                for part_id, part in document.parts.items():
                    if part_id not in annotatable_parts:
                        part_ids_to_del.append(part_id)
                for part_id in part_ids_to_del:
                    del document.parts[part_id]

            return doc_id

        except Exception as err:
            if self.raise_exception_on_incosistencies:
                raise err
            else:
                pass
Пример #5
0
    def test_DocumentLevelRelationEvaluator_arbitrary_relation_accept_fun_dont_count_multiple_same_hits(self):

        entity_map_fun = (lambda e: e.text)

        def relation_accept_fun(gold, pred):
            print('gold:', gold, ' <---> ', 'pred:', pred,)
            gold = int(gold[-1])
            pred = int(pred[-1])


            if gold <= pred and ((pred - gold) < 3):  # e.g., 1 <= 1, 2, 3
                return True
            else:
                return False

            return gold == pred

        r1 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "1"))
        r5 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "9"))  # Missing == fn
        r6 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "5"))
        r8 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "2"))  # (maps to 1) Own repetition in gold, so 1 should be counted twice

        r2 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "1"))  # Accept 1 --> do count == tp
        r3 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "2"))  # repeated Accept 1,2 --> do count because of own repetition in gold == tp
        r4 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "3"))  # repeated Accept 1,2 --> do not count because it's over repetition
        r7 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "6"))  # Accept 5 --> do count == tp
        r9 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "5"))  # Accept 5 --> do not count because it's over repetition

        self.assertEqual(True, relation_accept_fun(r1.map(entity_map_fun), r2.map(entity_map_fun)))
        self.assertEqual(True, relation_accept_fun(r1.map(entity_map_fun), r3.map(entity_map_fun)))
        self.assertEqual(True, relation_accept_fun(r1.map(entity_map_fun), r4.map(entity_map_fun)))
        self.assertEqual(False, relation_accept_fun(r1.map(entity_map_fun), r7.map(entity_map_fun)))

        self.assertEqual(False, relation_accept_fun(r5.map(entity_map_fun), r2.map(entity_map_fun)))
        self.assertEqual(False, relation_accept_fun(r5.map(entity_map_fun), r3.map(entity_map_fun)))
        self.assertEqual(False, relation_accept_fun(r5.map(entity_map_fun), r4.map(entity_map_fun)))
        self.assertEqual(False, relation_accept_fun(r5.map(entity_map_fun), r7.map(entity_map_fun)))

        self.assertEqual(True, relation_accept_fun(r6.map(entity_map_fun), r7.map(entity_map_fun)))

        self.assertEqual(False, relation_accept_fun(r8.map(entity_map_fun), r2.map(entity_map_fun)))
        self.assertEqual(True, relation_accept_fun(r8.map(entity_map_fun), r3.map(entity_map_fun)))
        self.assertEqual(True, relation_accept_fun(r8.map(entity_map_fun), r4.map(entity_map_fun)))
        self.assertEqual(False, relation_accept_fun(r8.map(entity_map_fun), r7.map(entity_map_fun)))

        evaluator = DocumentLevelRelationEvaluator(STUB_R_ID_1, entity_map_fun, relation_accept_fun)

        (dataset, part) = self._create_basic_dataset()

        # -

        part.relations = [r1, r5, r6, r8]
        part.predicted_relations = [r2, r3, r4, r7, r9]  # Only one shold be accepted

        evals = evaluator.evaluate(dataset)
        evaluation = evals(STUB_R_ID_1)
        print(evaluation)
        self.assertEqual(evaluation.tp, 3, evaluation)
        self.assertEqual(evaluation.fn, 1)
        self.assertEqual(evaluation.fp, 0)
        computation = evals(STUB_R_ID_1).compute(strictness="exact")
        self.assertEqual(computation.f_measure, 0.8571428571428571)
Пример #6
0
    def test_DocumentLevelRelationEvaluator_arbitrary_relation_accept_fun_ignore_some_predictions(self):

        entity_map_fun = (lambda e: e.text)

        def relation_accept_fun(gold, pred):
            gold_pred_char_num = int(gold[-1])
            pred_last_char_num = int(pred[-1])
            print('gold:', gold, ' <---> ', 'pred:', pred,)

            if gold == pred:  # 1 == 1
                return True
            elif gold < pred:  # 1 < 2
                return None
            else:
                return False   # 1 !<= 0

            return gold == pred

        r1 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "1"))

        r2 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "1"))  # Accept
        r3 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "2"))  # Ignore
        r4 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "0"))  # Reject

        self.assertEqual(True, relation_accept_fun(r1.map(entity_map_fun), r2.map(entity_map_fun)))
        self.assertEqual(None, relation_accept_fun(r1.map(entity_map_fun), r3.map(entity_map_fun)))
        self.assertEqual(False, relation_accept_fun(r1.map(entity_map_fun), r4.map(entity_map_fun)))

        evaluator = DocumentLevelRelationEvaluator(STUB_R_ID_1, entity_map_fun, relation_accept_fun)

        (dataset, part) = self._create_basic_dataset()

        # -

        part.relations = [r1]
        part.predicted_relations = [r2, r4] + [r3, r3, r3, r3, r3]  # All the r3's should be ignored

        evals = evaluator.evaluate(dataset)
        evaluation = evals(STUB_R_ID_1)
        print(evaluation)
        self.assertEqual(evaluation.tp, 1)
        self.assertEqual(evaluation.fn, 0)
        self.assertEqual(evaluation.fp, 1)
        computation = evals(STUB_R_ID_1).compute(strictness="exact")
        self.assertEqual(computation.f_measure, 0.6666666666666666)
Пример #7
0
    def test_DocumentLevelRelationEvaluator_arbitrary_relation_accept_fun_order_matters(self):

        entity_map_fun = (lambda e: e.text)

        def relation_accept_fun(gold, pred):
            print('gold:', gold, ' <---> ', 'pred:', pred)
            return gold < pred

        r1 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "1"), Entity(STUB_E_ID_2, 0, "2"))
        r2 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "2"), Entity(STUB_E_ID_2, 0, "1"))

        # r1 not equiv r1 because this IS NOT equals (r1 not < r1)
        self.assertFalse(relation_accept_fun(r1.map(entity_map_fun), r1.map(entity_map_fun)))
        # r1 < r2
        self.assertTrue(relation_accept_fun(r1.map(entity_map_fun), r2.map(entity_map_fun)))
        # r2 not < r1
        self.assertFalse(relation_accept_fun(r2.map(entity_map_fun), r1.map(entity_map_fun)))

        evaluator = DocumentLevelRelationEvaluator(STUB_R_ID_1, entity_map_fun, relation_accept_fun)

        (dataset, part) = self._create_basic_dataset()

        # -

        part.relations = [r1]
        part.predicted_relations = [r1]

        evals = evaluator.evaluate(dataset)
        evaluation = evals(STUB_R_ID_1)
        print(evaluation)
        self.assertEqual(evaluation.tp, 0)
        self.assertEqual(evaluation.fn, 1)
        self.assertEqual(evaluation.fp, 1)
        computation = evals(STUB_R_ID_1).compute(strictness="exact")
        self.assertEqual(computation.f_measure, 0.0)

        # -

        part.relations = [r1]
        part.predicted_relations = [r2]

        evals = evaluator.evaluate(dataset)
        evaluation = evals(STUB_R_ID_1)
        print(evaluation)
        self.assertEqual(evaluation.tp, 1)
        self.assertEqual(evaluation.fn, 0)
        self.assertEqual(evaluation.fp, 0)
        computation = evals(STUB_R_ID_1).compute(strictness="exact")
        self.assertEqual(computation.f_measure, 1.0)

        # -

        part.relations = [r2]
        part.predicted_relations = [r1]

        evals = evaluator.evaluate(dataset)
        evaluation = evals(STUB_R_ID_1)
        self.assertEqual(evaluation.tp, 0)
        self.assertEqual(evaluation.fn, 1)
        self.assertEqual(evaluation.fp, 1)
        computation = evals(STUB_R_ID_1).compute(strictness="exact")
        self.assertEqual(computation.f_measure, 0.0)
Пример #8
0
    def test_DocumentLevelRelationEvaluator_normalized_entities(self):

        evaluator = DocumentLevelRelationEvaluator(
            rel_type=STUB_R_ID_1,
            entity_map_fun=DocumentLevelRelationEvaluator.COMMON_ENTITY_MAP_FUNS['normalized_fun'](
                {STUB_E_ID_1: 'n_1', STUB_E_ID_2: 'n_1'},
                penalize_unknown_normalizations="no")
        )

        dataset = Dataset()
        doc_1 = Document()
        part_1 = Part('_irrelevant_')
        dataset.documents['doc_1'] = doc_1
        doc_1.parts['part_1'] = part_1

        part_1.relations = [
            Relation(
                STUB_R_ID_1,
                Entity(STUB_E_ID_1, 0, "Tool", norms={"n_1": "1964"}),
                Entity(STUB_E_ID_2, 0, "Maynard", norms={"n_1": "1961"})),
        ]

        # -

        part_1.predicted_relations = [
            Relation(
                # One without normalization, one with another different normalization
                STUB_R_ID_1,
                Entity(STUB_E_ID_1, 0, "Tool"),
                Entity(STUB_E_ID_2, 0, "Maynard", norms={"n_x": "1961"})),

            Relation(
                # One with different normalization, one with another different normalization
                STUB_R_ID_1,
                Entity(STUB_E_ID_1, 0, "Tool", norms={"n_1": "666"}),
                Entity(STUB_E_ID_2, 0, "Maynard", norms={"n_x": "1961"})),

            Relation(
                # Both with the correct normalization ids, but one has wrong normalization
                STUB_R_ID_1,
                Entity(STUB_E_ID_1, 0, "Tool", norms={"n_1": "666"}),
                Entity(STUB_E_ID_2, 0, "Maynard", norms={"n_1": "1961"})),

            Relation(
                # Both with another different normalization
                STUB_R_ID_1,
                Entity(STUB_E_ID_1, 0, "Tool", norms={"n_another_key": "1964"}),
                Entity(STUB_E_ID_2, 0, "Maynard", norms={"n_another_key": "1961"})),
        ]

        self._apply_pipeline(dataset)

        # ---

        evals = evaluator.evaluate(dataset)
        evaluation = evals(STUB_R_ID_1)
        self.assertEqual(evaluation.tp, 0)
        self.assertEqual(evaluation.fn, 1)
        self.assertEqual(evaluation.fp, 1)
        computation = evals(STUB_R_ID_1).compute(strictness="exact")
        self.assertEqual(computation.f_measure, 0.0)

        # -

        part_1.predicted_relations = [
            Relation(
                STUB_R_ID_1,
                Entity(STUB_E_ID_1, 0, "Tool band", norms={"n_1": "1964"}),
                Entity(STUB_E_ID_2, 0, "Maynard James Keenan", norms={"n_1": "1961"})),
        ]

        evals = evaluator.evaluate(dataset)
        evaluation = evals(STUB_R_ID_1)
        self.assertEqual(evaluation.tp, 1)
        self.assertEqual(evaluation.fn, 0)
        self.assertEqual(evaluation.fp, 0)
        computation = evals(STUB_R_ID_1).compute(strictness="exact")
        self.assertEqual(computation.f_measure, 1.0)