def test_DocumentLevelRelationEvaluator_repeated_relations_irrelevant( self): evaluator = DocumentLevelRelationEvaluator(rel_type=STUB_R_ID_1) dataset = Dataset() doc_1 = Document() part_1 = Part('_irrelevant_') dataset.documents['doc_1'] = doc_1 doc_1.parts['part_1'] = part_1 part_1.relations = [ Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "TOOL"), Entity(STUB_E_ID_2, 0, "maynard")), Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "TOOL"), Entity(STUB_E_ID_2, 0, "Danny Carey")), Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 1, "TOOL"), Entity(STUB_E_ID_2, 1, "Danny Carey")), ] # - part_1.predicted_relations = [ Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "TOOL"), Entity(STUB_E_ID_2, 0, "maynard")), Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 1, "TOOL"), Entity(STUB_E_ID_2, 1, "maynard")), ] self._apply_pipeline(dataset) # --- evals = evaluator.evaluate(dataset) evaluation = evals(STUB_R_ID_1) self.assertEqual(evaluation.tp, 1) self.assertEqual(evaluation.fn, 1) self.assertEqual(evaluation.fp, 0) computation = evals(STUB_R_ID_1).compute(strictness="exact") self.assertEqual(computation.f_measure, 0.6666666666666666) # - part_1.predicted_relations = [ Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 2, "TOOL"), Entity(STUB_E_ID_2, 2, "maynard")), Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 3, "TOOL"), Entity(STUB_E_ID_2, 3, "maynard")), Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 4, "TOOL"), Entity(STUB_E_ID_2, 4, "Danny Carey")), ] evals = evaluator.evaluate(dataset) evaluation = evals(STUB_R_ID_1) self.assertEqual(evaluation.tp, 2) self.assertEqual(evaluation.fn, 0) self.assertEqual(evaluation.fp, 0) computation = evals(STUB_R_ID_1).compute(strictness="exact") self.assertEqual(computation.f_measure, 1.0)
def test_DocumentLevelRelationEvaluator_default_entities_case_irrelevant( self): evaluator = DocumentLevelRelationEvaluator(rel_type=STUB_R_ID_1) dataset = Dataset() doc_1 = Document() part_1 = Part('_irrelevant_') dataset.documents['doc_1'] = doc_1 doc_1.parts['part_1'] = part_1 part_1.relations = [ Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "TOOL"), Entity(STUB_E_ID_2, 0, "maynard")), ] # - part_1.predicted_relations = [ # empty ] self._apply_pipeline(dataset) # - evals = evaluator.evaluate(dataset) evaluation = evals(STUB_R_ID_1) self.assertEqual(evaluation.tp, 0) computation = evals(STUB_R_ID_1).compute(strictness="exact") self.assertEqual(computation.f_measure, 0.0) # --- part_1.predicted_relations = [ Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "TOOL"), Entity(STUB_E_ID_2, 0, "maynard")), ] evals = evaluator.evaluate(dataset) evaluation = evals(STUB_R_ID_1) self.assertEqual(evaluation.tp, 1) computation = evals(STUB_R_ID_1).compute(strictness="exact") self.assertEqual(computation.f_measure, 1.0) # - part_1.predicted_relations = [ Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "tool"), Entity(STUB_E_ID_2, 0, "MAYNARD")), ] evals = evaluator.evaluate(dataset) evaluation = evals(STUB_R_ID_1) self.assertEqual(evaluation.tp, 1) computation = evals(STUB_R_ID_1).compute(strictness="exact") self.assertEqual(computation.f_measure, 1.0)
def test_DocumentLevelRelationEvaluator_arbitrary_relation_accept_fun_order_does_not_matter(self): entity_map_fun = (lambda e: "SAME") def relation_accept_fun(gold, pred): print('gold:', gold, ' <---> ', 'pred:', pred) return gold == pred r1 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "yin"), Entity(STUB_E_ID_2, 0, "yan")) r2 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "yan"), Entity(STUB_E_ID_2, 0, "yin")) self.assertTrue(relation_accept_fun(r1.map(entity_map_fun), r1.map(entity_map_fun))) self.assertTrue(relation_accept_fun(r1.map(entity_map_fun), r2.map(entity_map_fun))) self.assertTrue(relation_accept_fun(r2.map(entity_map_fun), r1.map(entity_map_fun))) evaluator = DocumentLevelRelationEvaluator(STUB_R_ID_1, entity_map_fun, relation_accept_fun) (dataset, part) = self._create_basic_dataset() # - part.relations = [r1] part.predicted_relations = [r1] evals = evaluator.evaluate(dataset) evaluation = evals(STUB_R_ID_1) print(evaluation) self.assertEqual(evaluation.tp, 1) self.assertEqual(evaluation.fn, 0) self.assertEqual(evaluation.fp, 0) computation = evals(STUB_R_ID_1).compute(strictness="exact") self.assertEqual(computation.f_measure, 1.0)
def test_DocumentLevelRelationEvaluator_false_positives(self): evaluator = DocumentLevelRelationEvaluator(rel_type=STUB_R_ID_1) dataset = Dataset() doc_1 = Document() part_1 = Part('_irrelevant_ PART *1*') dataset.documents['doc_1'] = doc_1 doc_1.parts['part_1'] = part_1 part_2 = Part('_irrelevant_ PART *2*') dataset.documents['doc_1'] = doc_1 doc_1.parts['part_2'] = part_2 part_1.relations = [ Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "TOOL"), Entity(STUB_E_ID_2, 0, "Maynard")), ] # - part_2.predicted_relations = [ Relation(STUB_R_ID_1, Entity(STUB_E_ID_2, 0, "TOOL"), Entity(STUB_E_ID_1, 0, "Snoop Dog")), ] self._apply_pipeline(dataset) # --- evals = evaluator.evaluate(dataset) evaluation = evals(STUB_R_ID_1) self.assertEqual(evaluation.tp, 0) self.assertEqual(evaluation.fn, 1) self.assertEqual(evaluation.fp, 1) computation = evals(STUB_R_ID_1).compute(strictness="exact") self.assertEqual(computation.f_measure, 0.0)
def test_StubSamePartRelationExtractor(self): dataset = TestTaggers.get_test_dataset() annotator = StubSamePartRelationExtractor(STUB_E_ID_1, STUB_E_ID_2, relation_type=STUB_R_ID_1) annotator.annotate(dataset) # Assert that indeed 4 sentences were considered assert 4 == len(list(dataset.sentences())), str( list(dataset.sentences())) print("actu_rels", list(dataset.relations())) print("edges", list(dataset.edges())) print("pred_rels", list(dataset.predicted_relations())) evaluator = DocumentLevelRelationEvaluator(rel_type=STUB_R_ID_1) evals = evaluator.evaluate(dataset) evaluation = evals(STUB_R_ID_1) self.assertEqual(evaluation.tp, 3) self.assertEqual(evaluation.fn, 0) self.assertEqual(evaluation.fp, 3) computation = evals(STUB_R_ID_1).compute(strictness="exact") self.assertEqual(computation.f_measure, 0.6666666666666666)
def test_DocumentLevelRelationEvaluator_arbitrary_relation_accept_fun_ignore_some_predictions(self): entity_map_fun = (lambda e: e.text) def relation_accept_fun(gold, pred): gold_pred_char_num = int(gold[-1]) pred_last_char_num = int(pred[-1]) print('gold:', gold, ' <---> ', 'pred:', pred,) if gold == pred: # 1 == 1 return True elif gold < pred: # 1 < 2 return None else: return False # 1 !<= 0 return gold == pred r1 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "1")) r2 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "1")) # Accept r3 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "2")) # Ignore r4 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "0")) # Reject self.assertEqual(True, relation_accept_fun(r1.map(entity_map_fun), r2.map(entity_map_fun))) self.assertEqual(None, relation_accept_fun(r1.map(entity_map_fun), r3.map(entity_map_fun))) self.assertEqual(False, relation_accept_fun(r1.map(entity_map_fun), r4.map(entity_map_fun))) evaluator = DocumentLevelRelationEvaluator(STUB_R_ID_1, entity_map_fun, relation_accept_fun) (dataset, part) = self._create_basic_dataset() # - part.relations = [r1] part.predicted_relations = [r2, r4] + [r3, r3, r3, r3, r3] # All the r3's should be ignored evals = evaluator.evaluate(dataset) evaluation = evals(STUB_R_ID_1) print(evaluation) self.assertEqual(evaluation.tp, 1) self.assertEqual(evaluation.fn, 0) self.assertEqual(evaluation.fp, 1) computation = evals(STUB_R_ID_1).compute(strictness="exact") self.assertEqual(computation.f_measure, 0.6666666666666666)
def test_Stub_D0_plus_D1_RelationExtractor(self): dataset = TestTaggers.get_test_dataset() edge_generator_1 = SentenceDistanceEdgeGenerator(STUB_E_ID_1, STUB_E_ID_2, STUB_R_ID_1, distance=0, rewrite_edges=False) edge_generator_2 = SentenceDistanceEdgeGenerator(STUB_E_ID_1, STUB_E_ID_2, STUB_R_ID_1, distance=1, rewrite_edges=False) edge_generator = CombinatorEdgeGenerator(edge_generator_1, edge_generator_2) annotator = StubRelationExtractor(edge_generator) annotator.annotate(dataset) # Assert that indeed 4 sentences were considered assert 4 == len(list(dataset.sentences())), str( list(dataset.sentences())) # print("actu_rels", list(dataset.relations())) # print("edges", list(dataset.edges())) # print("pred_rels", list(dataset.predicted_relations())) evaluator = DocumentLevelRelationEvaluator(rel_type=STUB_R_ID_1) evals = evaluator.evaluate(dataset) evaluation = evals(STUB_R_ID_1) self.assertEqual(evaluation.tp, 3) self.assertEqual(evaluation.fn, 0) self.assertEqual(evaluation.fp, 2) computation = evals(STUB_R_ID_1).compute(strictness="exact") self.assertEqual(computation.f_measure, 0.7499999999999999)
def test_DocumentLevelRelationEvaluator_arbitrary_relation_accept_fun_dont_count_multiple_same_hits(self): entity_map_fun = (lambda e: e.text) def relation_accept_fun(gold, pred): print('gold:', gold, ' <---> ', 'pred:', pred,) gold = int(gold[-1]) pred = int(pred[-1]) if gold <= pred and ((pred - gold) < 3): # e.g., 1 <= 1, 2, 3 return True else: return False return gold == pred r1 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "1")) r5 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "9")) # Missing == fn r6 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "5")) r8 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "2")) # (maps to 1) Own repetition in gold, so 1 should be counted twice r2 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "1")) # Accept 1 --> do count == tp r3 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "2")) # repeated Accept 1,2 --> do count because of own repetition in gold == tp r4 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "3")) # repeated Accept 1,2 --> do not count because it's over repetition r7 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "6")) # Accept 5 --> do count == tp r9 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "xxx"), Entity(STUB_E_ID_2, 0, "5")) # Accept 5 --> do not count because it's over repetition self.assertEqual(True, relation_accept_fun(r1.map(entity_map_fun), r2.map(entity_map_fun))) self.assertEqual(True, relation_accept_fun(r1.map(entity_map_fun), r3.map(entity_map_fun))) self.assertEqual(True, relation_accept_fun(r1.map(entity_map_fun), r4.map(entity_map_fun))) self.assertEqual(False, relation_accept_fun(r1.map(entity_map_fun), r7.map(entity_map_fun))) self.assertEqual(False, relation_accept_fun(r5.map(entity_map_fun), r2.map(entity_map_fun))) self.assertEqual(False, relation_accept_fun(r5.map(entity_map_fun), r3.map(entity_map_fun))) self.assertEqual(False, relation_accept_fun(r5.map(entity_map_fun), r4.map(entity_map_fun))) self.assertEqual(False, relation_accept_fun(r5.map(entity_map_fun), r7.map(entity_map_fun))) self.assertEqual(True, relation_accept_fun(r6.map(entity_map_fun), r7.map(entity_map_fun))) self.assertEqual(False, relation_accept_fun(r8.map(entity_map_fun), r2.map(entity_map_fun))) self.assertEqual(True, relation_accept_fun(r8.map(entity_map_fun), r3.map(entity_map_fun))) self.assertEqual(True, relation_accept_fun(r8.map(entity_map_fun), r4.map(entity_map_fun))) self.assertEqual(False, relation_accept_fun(r8.map(entity_map_fun), r7.map(entity_map_fun))) evaluator = DocumentLevelRelationEvaluator(STUB_R_ID_1, entity_map_fun, relation_accept_fun) (dataset, part) = self._create_basic_dataset() # - part.relations = [r1, r5, r6, r8] part.predicted_relations = [r2, r3, r4, r7, r9] # Only one shold be accepted evals = evaluator.evaluate(dataset) evaluation = evals(STUB_R_ID_1) print(evaluation) self.assertEqual(evaluation.tp, 3, evaluation) self.assertEqual(evaluation.fn, 1) self.assertEqual(evaluation.fp, 0) computation = evals(STUB_R_ID_1).compute(strictness="exact") self.assertEqual(computation.f_measure, 0.8571428571428571)
def test_DocumentLevelRelationEvaluator_arbitrary_relation_accept_fun_order_matters(self): entity_map_fun = (lambda e: e.text) def relation_accept_fun(gold, pred): print('gold:', gold, ' <---> ', 'pred:', pred) return gold < pred r1 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "1"), Entity(STUB_E_ID_2, 0, "2")) r2 = Relation(STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "2"), Entity(STUB_E_ID_2, 0, "1")) # r1 not equiv r1 because this IS NOT equals (r1 not < r1) self.assertFalse(relation_accept_fun(r1.map(entity_map_fun), r1.map(entity_map_fun))) # r1 < r2 self.assertTrue(relation_accept_fun(r1.map(entity_map_fun), r2.map(entity_map_fun))) # r2 not < r1 self.assertFalse(relation_accept_fun(r2.map(entity_map_fun), r1.map(entity_map_fun))) evaluator = DocumentLevelRelationEvaluator(STUB_R_ID_1, entity_map_fun, relation_accept_fun) (dataset, part) = self._create_basic_dataset() # - part.relations = [r1] part.predicted_relations = [r1] evals = evaluator.evaluate(dataset) evaluation = evals(STUB_R_ID_1) print(evaluation) self.assertEqual(evaluation.tp, 0) self.assertEqual(evaluation.fn, 1) self.assertEqual(evaluation.fp, 1) computation = evals(STUB_R_ID_1).compute(strictness="exact") self.assertEqual(computation.f_measure, 0.0) # - part.relations = [r1] part.predicted_relations = [r2] evals = evaluator.evaluate(dataset) evaluation = evals(STUB_R_ID_1) print(evaluation) self.assertEqual(evaluation.tp, 1) self.assertEqual(evaluation.fn, 0) self.assertEqual(evaluation.fp, 0) computation = evals(STUB_R_ID_1).compute(strictness="exact") self.assertEqual(computation.f_measure, 1.0) # - part.relations = [r2] part.predicted_relations = [r1] evals = evaluator.evaluate(dataset) evaluation = evals(STUB_R_ID_1) self.assertEqual(evaluation.tp, 0) self.assertEqual(evaluation.fn, 1) self.assertEqual(evaluation.fp, 1) computation = evals(STUB_R_ID_1).compute(strictness="exact") self.assertEqual(computation.f_measure, 0.0)
def test_DocumentLevelRelationEvaluator_normalized_entities(self): evaluator = DocumentLevelRelationEvaluator( rel_type=STUB_R_ID_1, entity_map_fun=DocumentLevelRelationEvaluator.COMMON_ENTITY_MAP_FUNS['normalized_fun']( {STUB_E_ID_1: 'n_1', STUB_E_ID_2: 'n_1'}, penalize_unknown_normalizations="no") ) dataset = Dataset() doc_1 = Document() part_1 = Part('_irrelevant_') dataset.documents['doc_1'] = doc_1 doc_1.parts['part_1'] = part_1 part_1.relations = [ Relation( STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "Tool", norms={"n_1": "1964"}), Entity(STUB_E_ID_2, 0, "Maynard", norms={"n_1": "1961"})), ] # - part_1.predicted_relations = [ Relation( # One without normalization, one with another different normalization STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "Tool"), Entity(STUB_E_ID_2, 0, "Maynard", norms={"n_x": "1961"})), Relation( # One with different normalization, one with another different normalization STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "Tool", norms={"n_1": "666"}), Entity(STUB_E_ID_2, 0, "Maynard", norms={"n_x": "1961"})), Relation( # Both with the correct normalization ids, but one has wrong normalization STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "Tool", norms={"n_1": "666"}), Entity(STUB_E_ID_2, 0, "Maynard", norms={"n_1": "1961"})), Relation( # Both with another different normalization STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "Tool", norms={"n_another_key": "1964"}), Entity(STUB_E_ID_2, 0, "Maynard", norms={"n_another_key": "1961"})), ] self._apply_pipeline(dataset) # --- evals = evaluator.evaluate(dataset) evaluation = evals(STUB_R_ID_1) self.assertEqual(evaluation.tp, 0) self.assertEqual(evaluation.fn, 1) self.assertEqual(evaluation.fp, 1) computation = evals(STUB_R_ID_1).compute(strictness="exact") self.assertEqual(computation.f_measure, 0.0) # - part_1.predicted_relations = [ Relation( STUB_R_ID_1, Entity(STUB_E_ID_1, 0, "Tool band", norms={"n_1": "1964"}), Entity(STUB_E_ID_2, 0, "Maynard James Keenan", norms={"n_1": "1961"})), ] evals = evaluator.evaluate(dataset) evaluation = evals(STUB_R_ID_1) self.assertEqual(evaluation.tp, 1) self.assertEqual(evaluation.fn, 0) self.assertEqual(evaluation.fp, 0) computation = evals(STUB_R_ID_1).compute(strictness="exact") self.assertEqual(computation.f_measure, 1.0)