def evaluate_predicate_mention(test_graphs, prop_ex, nom_file): """ Calculate the average predicate mention metric on test graphs. :param test_graphs: the graphs for the test sets :param prop_ex: the proposition extraction object :return the average predicate mention metric on the test graphs """ pred_graphs = [ predict_predicate_mention(test_graph, prop_ex, nom_file) for test_graph in test_graphs ] return np.mean([ compute_predicate_mention_agreement(test_graph, pred_graph)[0] for test_graph, pred_graph in zip(test_graphs, pred_graphs) ])
def evaluate_predicate_mention_verbal(test_graphs, prop_ex): """ Calculate the average predicate mention metric on the verbal propositions in test graphs :param test_graphs: the graphs for the test sets :param prop_ex: the proposition extraction object :return the average predicate mention metric on the verbal propositions in test graphs """ verbal_graphs = map(filter_verbal, test_graphs) pred_graphs = [ predict_predicate_mention(verbal_graph, prop_ex, apply_non_verbal=False) for verbal_graph in verbal_graphs ] return np.mean([ compute_predicate_mention_agreement(test_graph, pred_graph)[0] for test_graph, pred_graph in zip(verbal_graphs, pred_graphs) ])
def evaluate_predicate_mention_non_verbal(test_graphs, prop_ex, nom_file): """ Calculate the average predicate mention metric on the non-verbal propositions in test graphs :param test_graphs: the graphs for the test sets :param prop_ex: the proposition extraction object :return the average predicate mention metric on the non-verbal propositions in test graphs """ non_verbal_graphs = map(filter_non_verbal, test_graphs) pred_graphs = [ predict_predicate_mention(non_verbal_graph, prop_ex, apply_verbal=False, nom_file=nom_file) for non_verbal_graph in non_verbal_graphs ] return np.mean([ compute_predicate_mention_agreement( test_graph, pred_graph, for_inter_annotator_agreement=False)[0] for test_graph, pred_graph in zip(non_verbal_graphs, pred_graphs) ])
def compute_agreement(annotator1_file, annotator2_file): """ Receives two annotation files about the same story, each annotated by a different annotator, and computes the task-level agreement: 1) Entity mentions 2) Entity coreference 3) Predicate mentions 4) Predicate coreference 5) Argument mention within predicate chains 6) Entailment graph :param annotator1_file The path for the first graph :param annotator2_file The path for the second graph """ # Load the annotation files to OKR objects graph1 = load_graph_from_file(annotator1_file) graph2 = load_graph_from_file(annotator2_file) # Compute agreement for entity mentions and update the graphs to contain only annotations # in which both annotators agreed on the entity mentions ent_mention_score, consensual_graph1, consensual_graph2 = compute_entity_mention_agreement(graph1, graph2) print 'Entity mentions: %.3f' % ent_mention_score # Compute agreement for entity coreference and update the graphs to contain only annotations # in which both annotators agreed on the entity clusters ent_muc, ent_b_cube, ent_ceaf_c, ent_conll_f1, consensual_graph1, consensual_graph2 = \ compute_entity_coref_agreement(consensual_graph1, consensual_graph2) print 'Entity coreference: MUC=%.3f, B^3=%.3f, CEAF_C=%.3f, MELA=%.3f' % (ent_muc, ent_b_cube, ent_ceaf_c, ent_conll_f1) # Compute agreement for predicate mentions and update the graphs to contain only annotations # in which both annotators agreed on the predicate mentions # For analysis purposes, compute also verbal and non-verbal pred_mention_non_verbal_score = compute_predicate_mention_agreement_non_verbal(consensual_graph1, consensual_graph2) pred_mention_verbal_score = compute_predicate_mention_agreement_verbal(consensual_graph1, consensual_graph2) pred_mention_score, consensual_graph1, consensual_graph2 = compute_predicate_mention_agreement(consensual_graph1, consensual_graph2) print 'Predicate mentions: %.3f, verbal: %.3f, non-verbal: %.3f' % (pred_mention_score, pred_mention_verbal_score, pred_mention_non_verbal_score) # Compute agreement for predicate coreference and update the graphs to contain only annotations # in which both annotators agreed on the predicate clusters pred_muc, pred_b_cube, pred_ceaf_c, pred_conll_f1, consensual_graph1, consensual_graph2,optimal_alignment = \ compute_predicate_coref_agreement(consensual_graph1, consensual_graph2) print 'Predicate coreference: MUC=%.3f, B^3=%.3f, CEAF_C=%.3f, MELA=%.3f' % (pred_muc, pred_b_cube, pred_ceaf_c, pred_conll_f1) # Compute agreement for argument mention within predicate chains and update the graphs to contain only annotations # in which both annotators agreed on the argument mentions arg_mention_score, consensual_graph1, consensual_graph2= compute_argument_mention_agreement(consensual_graph1, consensual_graph2) print 'Argument mentions: %.3f' % arg_mention_score #Compute coreference scores for alignement between arguments of the same propositions: arg_muc, arg_b_cube, arg_ceaf_c, arg_conll_f1, consensual_graph1, consensual_graph2 = \ compute_argument_coref_agreement(consensual_graph1, consensual_graph2,optimal_alignment) print 'Argument coreference: MUC=%.3f, B^3=%.3f, CEAF_C=%.3f, MELA=%.3f' % (arg_muc, arg_b_cube, arg_ceaf_c, arg_conll_f1) # Compute agreement for the entailment graph and update the graphs to contain only annotations # in which both annotators agreed on the edges (propositions, arguments and entities) entities_f1, arguments_kappa, propositions_f1, consensual_graph1, consensual_graph2 = \ compute_entailment_graph_agreement(consensual_graph1, consensual_graph2) print 'Entailment graph F1: entities=%.3f, propositions=%.3f' % (entities_f1, propositions_f1) return [ent_mention_score, ent_muc, ent_b_cube, ent_ceaf_c, ent_conll_f1, pred_mention_score, pred_mention_verbal_score, pred_mention_non_verbal_score, pred_muc, pred_b_cube, pred_ceaf_c, pred_conll_f1, arg_mention_score, arg_muc, arg_b_cube, arg_ceaf_c, arg_conll_f1, entities_f1, propositions_f1]