def partial_evaluate(gold_list, predicted_list, partial_match_cutoff): """Evaluate the parse output with partial matching for arguments """ print 'PARTIAL EVALUATION - For diagnostics only and not for ranking' print 'Aligning relations - This will time out after 120 seconds' arg1_alignment, arg2_alignment, relation_alignment = \ aligner.align_relations(gold_list, predicted_list, partial_match_cutoff) arg1_match_prf, arg2_match_prf, total_match_prf = \ evaluate_args(arg1_alignment, arg2_alignment, partial_match_cutoff) entire_relation_match_prf = \ evaluate_rel_arg_whole_rel(relation_alignment, partial_match_cutoff) valid_senses = validator.identify_valid_senses(gold_list) sense_cm = evaluate_sense(relation_alignment, valid_senses) print 'Arg 1 extractor (partial matching) : Precision %1.4f Recall %1.4f F1 %1.4f' % arg1_match_prf print 'Arg 2 extractor (partial matching) : Precision %1.4f Recall %1.4f F1 %1.4f' % arg2_match_prf print 'Concatenated Arg 1 Arg 2 extractor (partial matching) : Precision %1.4f Recall %1.4f F1 %1.4f' % total_match_prf print 'Conjunctive Arg 1 & Arg 2 extractor (partial matching) : Precision %1.4f Recall %1.4f F1 %1.4f' % entire_relation_match_prf print 'Sense classification--------------' sense_cm.print_summary() print 'Overall parser performance (cutoff = %s)--------------' % partial_match_cutoff precision, recall, f1 = sense_cm.compute_micro_average_f1() print 'Precision %1.4f Recall %1.4f F1 %1.4f' % (precision, recall, f1) return arg1_match_prf, arg2_match_prf, entire_relation_match_prf, \ sense_cm.compute_micro_average_f1()
def partial_evaluate(gold_list, predicted_list, partial_match_cutoff): """Evaluate the parse output with partial matching for arguments """ print "Aligning relations - This will time out after 120 seconds" arg1_alignment, arg2_alignment, relation_alignment = aligner.align_relations( gold_list, predicted_list, partial_match_cutoff ) arg1_match_prf, arg2_match_prf, total_match_prf = evaluate_args( arg1_alignment, arg2_alignment, partial_match_cutoff ) entire_relation_match_prf = evaluate_rel_arg_whole_rel(relation_alignment, partial_match_cutoff) valid_senses = validator.identify_valid_senses(gold_list) sense_cm = evaluate_sense(relation_alignment, valid_senses) print "Arg 1 extractor (partial matching) : Precision %1.4f Recall %1.4f F1 %1.4f" % arg1_match_prf print "Arg 2 extractor (partial matching) : Precision %1.4f Recall %1.4f F1 %1.4f" % arg2_match_prf print "Concatenated Arg 1 Arg 2 extractor (partial matching) : Precision %1.4f Recall %1.4f F1 %1.4f" % total_match_prf print "Conjunctive Arg 1 & Arg 2 extractor (partial matching) : Precision %1.4f Recall %1.4f F1 %1.4f" % entire_relation_match_prf print "Sense classification--------------" sense_cm.print_summary() print "Overall parser performance (cutoff = %s)--------------" % partial_match_cutoff precision, recall, f1 = sense_cm.compute_micro_average_f1() print "Precision %1.4f Recall %1.4f F1 %1.4f" % (precision, recall, f1) return arg1_match_prf, arg2_match_prf, entire_relation_match_prf, sense_cm.compute_micro_average_f1()
def evaluate_sense(gold_list, predicted_list): print "In function: evaluate_sense" """Evaluate sense classifier The label ConfusionMatrix.NEGATIVE_CLASS is for the relations that are missed by the system because the arguments don't match any of the gold relations. """ sense_alphabet = Alphabet() valid_senses = validator.identify_valid_senses(gold_list) for relation in gold_list: sense = relation['Sense'][0] if sense in valid_senses: sense_alphabet.add(sense) sense_alphabet.add(ConfusionMatrix.NEGATIVE_CLASS) sense_cm = ConfusionMatrix(sense_alphabet) gold_to_predicted_map, predicted_to_gold_map = \ _link_gold_predicted(gold_list, predicted_list, spans_exact_matching) for i, gold_relation in enumerate(gold_list): gold_sense = gold_relation['Sense'][0] if gold_sense in valid_senses: if i in gold_to_predicted_map: predicted_sense = gold_to_predicted_map[i]['Sense'][0] if predicted_sense in gold_relation['Sense']: sense_cm.add(predicted_sense, predicted_sense) else: if not sense_cm.alphabet.has_label(predicted_sense): predicted_sense = ConfusionMatrix.NEGATIVE_CLASS sense_cm.add(predicted_sense, gold_sense) else: sense_cm.add(ConfusionMatrix.NEGATIVE_CLASS, gold_sense) for i, predicted_relation in enumerate(predicted_list): if i not in predicted_to_gold_map: predicted_sense = predicted_relation['Sense'][0] if not sense_cm.alphabet.has_label(predicted_sense): predicted_sense = ConfusionMatrix.NEGATIVE_CLASS sense_cm.add(predicted_sense, ConfusionMatrix.NEGATIVE_CLASS) return sense_cm
def evaluate_sense(gold_list, predicted_list): print "In function: evaluate_sense"; """Evaluate sense classifier The label ConfusionMatrix.NEGATIVE_CLASS is for the relations that are missed by the system because the arguments don't match any of the gold relations. """ sense_alphabet = Alphabet() valid_senses = validator.identify_valid_senses(gold_list) for relation in gold_list: sense = relation['Sense'][0] if sense in valid_senses: sense_alphabet.add(sense) sense_alphabet.add(ConfusionMatrix.NEGATIVE_CLASS) sense_cm = ConfusionMatrix(sense_alphabet) gold_to_predicted_map, predicted_to_gold_map = \ _link_gold_predicted(gold_list, predicted_list, spans_exact_matching) for i, gold_relation in enumerate(gold_list): gold_sense = gold_relation['Sense'][0] if gold_sense in valid_senses: if i in gold_to_predicted_map: predicted_sense = gold_to_predicted_map[i]['Sense'][0] if predicted_sense in gold_relation['Sense']: sense_cm.add(predicted_sense, predicted_sense) else: if not sense_cm.alphabet.has_label(predicted_sense): predicted_sense = ConfusionMatrix.NEGATIVE_CLASS sense_cm.add(predicted_sense, gold_sense) else: sense_cm.add(ConfusionMatrix.NEGATIVE_CLASS, gold_sense) for i, predicted_relation in enumerate(predicted_list): if i not in predicted_to_gold_map: predicted_sense = predicted_relation['Sense'][0] if not sense_cm.alphabet.has_label(predicted_sense): predicted_sense = ConfusionMatrix.NEGATIVE_CLASS sense_cm.add(predicted_sense, ConfusionMatrix.NEGATIVE_CLASS) return sense_cm
def evaluate_sense(gold_list, predicted_list, verbose=False): """Evaluate sense classifier The label ConfusionMatrix.NEGATIVE_CLASS is for the relations that are missed by the system because the arguments don't match any of the gold relations. """ sense_alphabet = Alphabet() valid_senses = validator.identify_valid_senses(gold_list) isense = None for relation in gold_list: isense = relation['Sense'][0] if isense in valid_senses: sense_alphabet.add(isense) sense_alphabet.add(ConfusionMatrix.NEGATIVE_CLASS) sense_cm = ConfusionMatrix(sense_alphabet) gold_to_predicted_map, predicted_to_gold_map = \ _link_gold_predicted(gold_list, predicted_list, spans_exact_matching) for i, gold_relation in enumerate(gold_list): gold_sense = gold_relation['Sense'][0] if gold_sense in valid_senses: if i in gold_to_predicted_map: predicted_sense = gold_to_predicted_map[i]['Sense'][0] if predicted_sense in gold_relation['Sense']: sense_cm.add(predicted_sense, predicted_sense) else: if not sense_cm.alphabet.has_label(predicted_sense): predicted_sense = ConfusionMatrix.NEGATIVE_CLASS if verbose: print('Sense:') print('<<<\t{:s}'.format(gold_sense).encode(ENCODING)) print('>>>\t{:s}'.format(predicted_sense).encode( ENCODING)) print('Arg1:\t{:s}'.format( gold_relation['Arg1']['RawText']).encode(ENCODING)) print('Arg2:\t{:s}'.format( gold_relation['Arg2']['RawText']).encode(ENCODING)) print() sense_cm.add(predicted_sense, gold_sense) else: if verbose: print('Sense:') print('<<<\t{:s}'.format(gold_sense).encode(ENCODING)) print('>>>\t{:s}'.format( ConfusionMatrix.NEGATIVE_CLASS).encode( ENCODING)) print('Arg1:\t{:s}'.format( gold_relation['Arg1']['RawText']).encode(ENCODING)) print('Arg2:\t{:s}'.format( gold_relation['Arg2']['RawText']).encode(ENCODING)) print() sense_cm.add(ConfusionMatrix.NEGATIVE_CLASS, gold_sense) for i, predicted_relation in enumerate(predicted_list): if i not in predicted_to_gold_map: predicted_sense = predicted_relation['Sense'][0] if not sense_cm.alphabet.has_label(predicted_sense): predicted_sense = ConfusionMatrix.NEGATIVE_CLASS if verbose: print('Sense:') print('<<<\t{:s}'.format(gold_sense).encode(ENCODING)) print('>>>\t{:s}'.format( ConfusionMatrix.NEGATIVE_CLASS).encode( ENCODING)) print('Arg1:\t{:s}'.format( gold_relation['Arg1']['RawText']).encode(ENCODING)) print('Arg2:\t{:s}'.format( gold_relation['Arg2']['RawText']).encode(ENCODING)) print() sense_cm.add(predicted_sense, ConfusionMatrix.NEGATIVE_CLASS) return sense_cm