def score_responses(self): scores = ScorePrinter(self.logger, self.printing_specs, self.separator) mean_f1 = 0 count = 0 for document_id in self.get('core_documents'): # add scores corresponding to all gold clusters document_gold_to_system = self.get('cluster_alignment').get( 'gold_to_system').get(document_id) document_system_to_gold = self.get('cluster_alignment').get( 'system_to_gold').get(document_id) for gold_cluster_id in document_gold_to_system if document_gold_to_system else []: system_cluster_id = document_gold_to_system.get( gold_cluster_id).get('aligned_to') aligned_similarity = document_gold_to_system.get( gold_cluster_id).get('aligned_similarity') if system_cluster_id and system_cluster_id != 'None' and aligned_similarity == 0: self.record_event('DEFAULT_CRITICAL_ERROR', 'aligned_similarity=0') precision, recall, f1 = [0, 0, 0] if system_cluster_id and system_cluster_id != 'None': gold_cluster = self.get('gold_responses').get( 'document_clusters').get(document_id).get( gold_cluster_id) system_cluster = self.get('system_responses').get( 'document_clusters').get(document_id).get( system_cluster_id) skip_flag = False for cluster in [gold_cluster, system_cluster]: if cluster.get('metatype') not in [ 'Event', 'Relation' ]: skip_flag = True if skip_flag: continue gold_frame = self.get('gold_responses').get( 'document_frames').get(document_id).get( gold_cluster_id) gold_slot_fillers = {} if gold_frame is None: self.record_event('MISSING_GOLD_FRAME', gold_cluster.get('metatype'), gold_cluster_id, document_id, self.get('code_location')) continue for role_name in gold_frame.get('role_fillers'): for gold_filler_cluster_id in gold_frame.get( 'role_fillers').get(role_name): gold_slot_fillers['{}:{}'.format( role_name, gold_filler_cluster_id)] = 1 system_frame = self.get('system_responses').get( 'document_frames').get(document_id).get( system_cluster_id) system_slot_fillers = {} for role_name in system_frame.get('role_fillers'): for system_filler_cluster_id in system_frame.get( 'role_fillers').get(role_name): aligned_gold_filler_cluster_id = document_system_to_gold.get( system_filler_cluster_id).get('aligned_to') aligned_gold_filler_cluster_id_similarity = document_system_to_gold.get( system_filler_cluster_id).get( 'aligned_similarity') if aligned_gold_filler_cluster_id and aligned_gold_filler_cluster_id != 'None': if aligned_gold_filler_cluster_id_similarity == 0: self.record_event('DEFAULT_CRITICAL_ERROR', 'aligned_similarity=0') system_slot_fillers['{}:{}'.format( role_name, aligned_gold_filler_cluster_id)] = 1 else: system_slot_fillers['{}:{}'.format( role_name, system_filler_cluster_id)] = 1 if len(gold_slot_fillers) and len(system_slot_fillers): precision, recall, f1 = get_precision_recall_and_f1( set(gold_slot_fillers.keys()), set(system_slot_fillers.keys())) mean_f1 += f1 count += 1 score = FrameMetricScore(self.logger, self.get('runid'), document_id, gold_cluster_id, system_cluster_id, precision, recall, f1) scores.add(score) # add scores corresponding to unaligned system clusters for system_cluster_id in document_system_to_gold if document_system_to_gold else []: gold_cluster_id = document_system_to_gold.get( system_cluster_id).get('aligned_to') aligned_similarity = document_system_to_gold.get( system_cluster_id).get('aligned_similarity') if gold_cluster_id and gold_cluster_id != 'None' and aligned_similarity == 0: self.record_event('DEFAULT_CRITICAL_ERROR', 'aligned_similarity=0') if gold_cluster_id and gold_cluster_id != 'None': continue precision, recall, f1 = [0, 0, 0] count += 1 score = FrameMetricScore(self.logger, self.get('runid'), document_id, gold_cluster_id, system_cluster_id, precision, recall, f1) scores.add(score) mean_f1 = mean_f1 / count if count else 0 mean_score = FrameMetricScore(self.logger, self.get('runid'), 'Summary', '', '', '', '', mean_f1, summary=True) scores.add(mean_score) self.scores = scores
def score_responses(self): scores = [] for document_id in self.get('core_documents'): # add scores corresponding to all gold clusters document = self.get('gold_responses').get('document_mappings').get( 'documents').get(document_id) language = document.get('language') document_gold_to_system = self.get('cluster_alignment').get( 'gold_to_system').get(document_id) document_system_to_gold = self.get('cluster_alignment').get( 'system_to_gold').get(document_id) for gold_cluster_id in document_gold_to_system if document_gold_to_system else []: system_cluster_id = document_gold_to_system.get( gold_cluster_id).get('aligned_to') aligned_similarity = document_gold_to_system.get( gold_cluster_id).get('aligned_similarity') precision, recall, f1 = [0, 0, 0] if gold_cluster_id == 'None': continue gold_cluster = self.get('cluster', 'gold', document_id, gold_cluster_id) metatype = gold_cluster.get('metatype') if metatype not in ['Event', 'Relation']: continue if system_cluster_id != 'None': if aligned_similarity == 0: self.record_event('DEFAULT_CRITICAL_ERROR', 'aligned_similarity=0') system_cluster = self.get('cluster', 'system', document_id, system_cluster_id) if system_cluster.get('metatype') != metatype: self.record_event( 'UNEXPECTED_ALIGNED_CLUSTER_METATYPE', system_cluster.get('metatype'), system_cluster_id, metatype, gold_cluster_id) gold_frame = self.get('frame', 'gold', document_id, gold_cluster_id) gold_slot_fillers = {} if gold_frame is None or len( gold_frame.get('role_fillers')) == 0: if gold_cluster.get('metatype') == 'Relation': self.record_event('MISSING_GOLD_FRAME', gold_cluster.get('metatype'), gold_cluster_id, document_id, self.get('code_location')) continue for role_name in gold_frame.get('role_fillers'): for gold_filler_cluster_id in gold_frame.get( 'role_fillers').get(role_name): gold_slot_fillers['{}:{}'.format( role_name, gold_filler_cluster_id)] = 1 system_frame = self.get('frame', 'system', document_id, system_cluster_id) if system_frame: system_slot_fillers = {} for role_name in system_frame.get('role_fillers'): for system_filler_cluster_id in system_frame.get( 'role_fillers').get(role_name): aligned_gold_filler_cluster_id = document_system_to_gold.get( system_filler_cluster_id).get('aligned_to') aligned_gold_filler_cluster_id_similarity = document_system_to_gold.get( system_filler_cluster_id).get( 'aligned_similarity') if aligned_gold_filler_cluster_id != 'None': if aligned_gold_filler_cluster_id_similarity == 0: self.record_event( 'DEFAULT_CRITICAL_ERROR', 'aligned_similarity=0') system_slot_fillers['{}:{}'.format( role_name, aligned_gold_filler_cluster_id)] = 1 else: system_slot_fillers['{}:{}'.format( role_name, system_filler_cluster_id)] = 1 if len(gold_slot_fillers) and len(system_slot_fillers): precision, recall, f1 = get_precision_recall_and_f1( set(gold_slot_fillers.keys()), set(system_slot_fillers.keys())) score = FrameMetricScore(logger=self.logger, run_id=self.get('run_id'), document_id=document_id, language=language, metatype=metatype, gold_cluster_id=gold_cluster_id, system_cluster_id=system_cluster_id, precision=precision, recall=recall, f1=f1) scores.append(score) # add scores corresponding to unaligned system clusters precision, recall, f1 = [0, 0, 0] for system_cluster_id in document_system_to_gold if document_system_to_gold else []: gold_cluster_id = document_system_to_gold.get( system_cluster_id).get('aligned_to') aligned_similarity = document_system_to_gold.get( system_cluster_id).get('aligned_similarity') if system_cluster_id != 'None': if gold_cluster_id == 'None': metatype = self.get('cluster', 'system', document_id, system_cluster_id).get('metatype') if metatype not in ['Event', 'Relation']: continue score = FrameMetricScore( logger=self.logger, run_id=self.get('run_id'), document_id=document_id, language=language, metatype=metatype, gold_cluster_id=gold_cluster_id, system_cluster_id=system_cluster_id, precision=precision, recall=recall, f1=f1) scores.append(score) elif aligned_similarity == 0: self.record_event('DEFAULT_CRITICAL_ERROR', 'aligned_similarity=0') scores_printer = ScorePrinter(self.logger, self.printing_specs) for score in multisort(scores, (('document_id', False), ('metatype', False), ('gold_cluster_id', False), ('system_cluster_id', False))): scores_printer.add(score) self.aggregate_scores(scores_printer, FrameMetricScore) self.scores = scores_printer
def score_responses(self): scores = ScorePrinter(self.logger, self.printing_specs, self.separator) mean_f1 = 0 count = 0 for document_id in self.get('core_documents'): # add scores corresponding to all gold clusters document_gold_to_system = self.get('cluster_alignment').get( 'gold_to_system').get(document_id) for gold_cluster_id in document_gold_to_system if document_gold_to_system else []: system_cluster_id = document_gold_to_system.get( gold_cluster_id).get('aligned_to') aligned_similarity = document_gold_to_system.get( gold_cluster_id).get('aligned_similarity') if system_cluster_id and aligned_similarity == 0: self.record_event('DEFAULT_CRITICAL_ERROR', 'aligned_similarity=0') precision, recall, f1 = [0, 0, 0] if system_cluster_id: gold_cluster_types = set( self.get('gold_responses').get( 'document_clusters').get(document_id).get( gold_cluster_id).get('all_expanded_types')) system_cluster_types = set() if document_id in self.get('system_responses').get( 'document_clusters'): system_cluster_types = set( self.get('system_responses'). get('document_clusters').get(document_id).get( system_cluster_id).get('all_expanded_types')) precision, recall, f1 = get_precision_recall_and_f1( gold_cluster_types, system_cluster_types) mean_f1 += f1 count += 1 score = TypeMetricScore(self.logger, self.get('runid'), document_id, gold_cluster_id, system_cluster_id, precision, recall, f1) scores.add(score) # add scores unaligned system clusters document_system_to_gold = self.get('cluster_alignment').get( 'system_to_gold').get(document_id) for system_cluster_id in document_system_to_gold if document_system_to_gold else []: gold_cluster_id = document_system_to_gold.get( system_cluster_id).get('aligned_to') aligned_similarity = document_system_to_gold.get( system_cluster_id).get('aligned_similarity') if gold_cluster_id and aligned_similarity == 0: self.record_event('DEFAULT_CRITICAL_ERROR', 'aligned_similarity=0') if gold_cluster_id: continue precision, recall, f1 = [0, 0, 0] count += 1 score = TypeMetricScore(self.logger, self.get('runid'), document_id, gold_cluster_id, system_cluster_id, precision, recall, f1) scores.add(score) mean_f1 = mean_f1 / count mean_score = TypeMetricScore(self.logger, self.get('runid'), 'Summary', '', '', '', '', mean_f1, summary=True) scores.add(mean_score) self.scores = scores
def score_responses(self): scores = [] for document_id in self.get('core_documents'): # add scores corresponding to all gold clusters document = self.get('gold_responses').get('document_mappings').get('documents').get(document_id) language = document.get('language') self.record_event('ANNOTATED_TYPES_INFO', document_id, ','.join(self.get('annotated_regions').get('types_annotated_for_document', document_id))) document_gold_to_system = self.get('cluster_alignment').get('gold_to_system').get(document_id) for gold_cluster_id in document_gold_to_system if document_gold_to_system else []: system_cluster_id = document_gold_to_system.get(gold_cluster_id).get('aligned_to') aligned_similarity = document_gold_to_system.get(gold_cluster_id).get('aligned_similarity') precision, recall, f1 = [0,0,0] if gold_cluster_id == 'None': continue gold_cluster = self.get('gold_responses').get('document_clusters').get(document_id).get(gold_cluster_id) metatype = gold_cluster.get('metatype') if metatype not in ['Entity', 'Event']: continue if system_cluster_id != 'None': if aligned_similarity == 0: self.record_event('DEFAULT_CRITICAL_ERROR', 'aligned_similarity=0') system_cluster = self.get('cluster', 'system', document_id, system_cluster_id) if system_cluster.get('metatype') != metatype: self.record_event('UNEXPECTED_ALIGNED_CLUSTER_METATYPE', system_cluster.get('metatype'), system_cluster_id, metatype, gold_cluster_id) gold_types = set(gold_cluster.get('all_expanded_types')) system_types = set() if document_id in self.get('system_responses').get('document_clusters'): system_types = set(self.get('system_responses').get('document_clusters').get(document_id).get(system_cluster_id).get('all_expanded_types')) augmented_gold_types = self.get('augmented_types', document_id, gold_types) augmented_system_types = self.get('augmented_types', document_id, system_types) self.record_event('TYPE_METRIC_SCORE_INFO', self.__class__.__name__, 'TYPES_SUBMITTED', document_id, gold_cluster_id, ','.join(gold_types), system_cluster_id, ','.join(system_types)) self.record_event('TYPE_METRIC_SCORE_INFO', self.__class__.__name__, 'TYPES_SCORED', document_id, gold_cluster_id, ','.join(augmented_gold_types), system_cluster_id, ','.join(augmented_system_types)) precision, recall, f1 = get_precision_recall_and_f1(augmented_gold_types, augmented_system_types) score = TypeMetricScoreV1(logger=self.logger, run_id=self.get('run_id'), document_id=document_id, language=language, metatype=metatype, gold_cluster_id=gold_cluster_id, system_cluster_id=system_cluster_id, precision=precision, recall=recall, f1=f1) scores.append(score) # add scores unaligned system clusters document_system_to_gold = self.get('cluster_alignment').get('system_to_gold').get(document_id) for system_cluster_id in document_system_to_gold if document_system_to_gold else []: gold_cluster_id = document_system_to_gold.get(system_cluster_id).get('aligned_to') aligned_similarity = document_system_to_gold.get(system_cluster_id).get('aligned_similarity') if system_cluster_id != 'None': system_cluster = self.get('system_responses').get('document_clusters').get(document_id).get(system_cluster_id) metatype = system_cluster.get('metatype') if metatype not in ['Entity', 'Event']: continue if gold_cluster_id == 'None': precision, recall, f1 = [0,0,0] score = TypeMetricScoreV1(logger=self.logger, run_id=self.get('run_id'), document_id=document_id, language=language, metatype=metatype, gold_cluster_id=gold_cluster_id, system_cluster_id=system_cluster_id, precision=precision, recall=recall, f1=f1) scores.append(score) elif aligned_similarity == 0: self.record_event('DEFAULT_CRITICAL_ERROR', 'aligned_similarity=0') scores_printer = ScorePrinter(self.logger, self.printing_specs) for score in multisort(scores, (('document_id', False), ('metatype', False), ('gold_cluster_id', False), ('system_cluster_id', False))): scores_printer.add(score) self.aggregate_scores(scores_printer, TypeMetricScoreV1) self.scores = scores_printer
def score_responses(self): scores = [] mean_f1s = {} counts = {} for document_id in self.get('core_documents'): # add scores corresponding to all gold clusters document = self.get('gold_responses').get('document_mappings').get( 'documents').get(document_id) language = document.get('language') document_gold_to_system = self.get('cluster_alignment').get( 'gold_to_system').get(document_id) document_system_to_gold = self.get('cluster_alignment').get( 'system_to_gold').get(document_id) for gold_cluster_id in document_gold_to_system if document_gold_to_system else []: system_cluster_id = document_gold_to_system.get( gold_cluster_id).get('aligned_to') aligned_similarity = document_gold_to_system.get( gold_cluster_id).get('aligned_similarity') precision, recall, f1 = [0, 0, 0] if gold_cluster_id == 'None': continue gold_cluster = self.get('cluster', 'gold', document_id, gold_cluster_id) metatype = gold_cluster.get('metatype') if metatype not in ['Event', 'Relation']: continue if system_cluster_id != 'None': if aligned_similarity == 0: self.record_event('DEFAULT_CRITICAL_ERROR', 'aligned_similarity=0') system_cluster = self.get('cluster', 'system', document_id, system_cluster_id) if system_cluster.get('metatype') != metatype: self.record_event( 'UNEXPECTED_ALIGNED_CLUSTER_METATYPE', system_cluster.get('metatype'), system_cluster_id, metatype, gold_cluster_id) gold_frame = self.get('frame', 'gold', document_id, gold_cluster_id) gold_slot_fillers = {} if gold_frame is None or len( gold_frame.get('role_fillers')) == 0: if gold_cluster.get('metatype') == 'Relation': self.record_event('MISSING_GOLD_FRAME', gold_cluster.get('metatype'), gold_cluster_id, document_id, self.get('code_location')) continue for role_name in gold_frame.get('role_fillers'): for gold_filler_cluster_id in gold_frame.get( 'role_fillers').get(role_name): gold_slot_fillers['{}:{}'.format( role_name, gold_filler_cluster_id)] = 1 system_frame = self.get('frame', 'system', document_id, system_cluster_id) if system_frame: system_slot_fillers = {} for role_name in system_frame.get('role_fillers'): for system_filler_cluster_id in system_frame.get( 'role_fillers').get(role_name): aligned_gold_filler_cluster_id = document_system_to_gold.get( system_filler_cluster_id).get('aligned_to') aligned_gold_filler_cluster_id_similarity = document_system_to_gold.get( system_filler_cluster_id).get( 'aligned_similarity') if aligned_gold_filler_cluster_id != 'None': if aligned_gold_filler_cluster_id_similarity == 0: self.record_event( 'DEFAULT_CRITICAL_ERROR', 'aligned_similarity=0') system_slot_fillers['{}:{}'.format( role_name, aligned_gold_filler_cluster_id)] = 1 else: system_slot_fillers['{}:{}'.format( role_name, system_filler_cluster_id)] = 1 if len(gold_slot_fillers) and len(system_slot_fillers): precision, recall, f1 = get_precision_recall_and_f1( set(gold_slot_fillers.keys()), set(system_slot_fillers.keys())) for metatype_key in ['ALL', metatype]: for language_key in ['ALL', language]: key = '{language}:{metatype}'.format( metatype=metatype_key, language=language_key) mean_f1s[key] = mean_f1s.get(key, 0) + f1 counts[key] = counts.get(key, 0) + 1 score = FrameMetricScore(self.logger, self.get('runid'), document_id, language, metatype, gold_cluster_id, system_cluster_id, precision, recall, f1) scores.append(score) # add scores corresponding to unaligned system clusters precision, recall, f1 = [0, 0, 0] for system_cluster_id in document_system_to_gold if document_system_to_gold else []: gold_cluster_id = document_system_to_gold.get( system_cluster_id).get('aligned_to') aligned_similarity = document_system_to_gold.get( system_cluster_id).get('aligned_similarity') if system_cluster_id != 'None': if gold_cluster_id == 'None': metatype = self.get('cluster', 'system', document_id, system_cluster_id).get('metatype') if metatype not in ['Event', 'Relation']: continue for metatype_key in ['ALL', metatype]: for language_key in ['ALL', language]: key = '{language}:{metatype}'.format( metatype=metatype_key, language=language_key) mean_f1s[key] = mean_f1s.get(key, 0) + f1 counts[key] = counts.get(key, 0) + 1 score = FrameMetricScore(self.logger, self.get('runid'), document_id, language, metatype, gold_cluster_id, system_cluster_id, precision, recall, f1) scores.append(score) elif aligned_similarity == 0: self.record_event('DEFAULT_CRITICAL_ERROR', 'aligned_similarity=0') scores_printer = ScorePrinter(self.logger, self.printing_specs, self.separator) for score in multisort(scores, (('document_id', False), ('metatype', False), ('gold_cluster_id', False), ('system_cluster_id', False))): scores_printer.add(score) for key in sorted(mean_f1s, key=self.order): mean_f1 = mean_f1s[key] / counts[key] if counts[key] else 0 language, metatype = key.split(':') mean_score = FrameMetricScore(self.logger, self.get('runid'), 'Summary', language, metatype, '', '', '', '', mean_f1, summary=True) scores_printer.add(mean_score) self.scores = scores_printer
def score_responses(self): scores = [] mean_f1s = {} counts = {} for document_id in self.get('core_documents'): # add scores corresponding to all gold clusters document = self.get('gold_responses').get('document_mappings').get( 'documents').get(document_id) language = document.get('language') self.record_event( 'ANNOTATED_TYPES_INFO', document_id, ','.join( self.get('annotated_regions').get( 'types_annotated_for_document', document_id))) document_gold_to_system = self.get('cluster_alignment').get( 'gold_to_system').get(document_id) for gold_cluster_id in document_gold_to_system if document_gold_to_system else []: system_cluster_id = document_gold_to_system.get( gold_cluster_id).get('aligned_to') aligned_similarity = document_gold_to_system.get( gold_cluster_id).get('aligned_similarity') precision, recall, f1 = [0, 0, 0] if gold_cluster_id == 'None': continue gold_cluster = self.get('gold_responses').get( 'document_clusters').get(document_id).get(gold_cluster_id) metatype = gold_cluster.get('metatype') if metatype not in ['Entity', 'Event']: continue if system_cluster_id != 'None': if aligned_similarity == 0: self.record_event('DEFAULT_CRITICAL_ERROR', 'aligned_similarity=0') system_cluster = self.get('cluster', 'system', document_id, system_cluster_id) if system_cluster.get('metatype') != metatype: self.record_event( 'UNEXPECTED_ALIGNED_CLUSTER_METATYPE', system_cluster.get('metatype'), system_cluster_id, metatype, gold_cluster_id) gold_types = set(gold_cluster.get('all_expanded_types')) system_types = set() if document_id in self.get('system_responses').get( 'document_clusters'): system_types = set( self.get('system_responses'). get('document_clusters').get(document_id).get( system_cluster_id).get('all_expanded_types')) augmented_gold_types = self.get('augmented_types', document_id, gold_types) augmented_system_types = self.get('augmented_types', document_id, system_types) self.record_event('TEMPORAL_METRIC_SCORE_INFO', 'TYPES_SUBMITTED', document_id, gold_cluster_id, ','.join(gold_types), system_cluster_id, ','.join(system_types)) self.record_event('TEMPORAL_METRIC_SCORE_INFO', 'TYPES_SCORED', document_id, gold_cluster_id, ','.join(augmented_gold_types), system_cluster_id, ','.join(augmented_system_types)) precision, recall, f1 = get_precision_recall_and_f1( augmented_gold_types, augmented_system_types) for metatype_key in ['ALL', metatype]: for language_key in ['ALL', language]: key = '{language}:{metatype}'.format( metatype=metatype_key, language=language_key) mean_f1s[key] = mean_f1s.get(key, 0) + f1 counts[key] = counts.get(key, 0) + 1 score = TypeMetricScore(self.logger, self.get('runid'), document_id, language, metatype, gold_cluster_id, system_cluster_id, precision, recall, f1) scores.append(score) # add scores unaligned system clusters document_system_to_gold = self.get('cluster_alignment').get( 'system_to_gold').get(document_id) for system_cluster_id in document_system_to_gold if document_system_to_gold else []: gold_cluster_id = document_system_to_gold.get( system_cluster_id).get('aligned_to') aligned_similarity = document_system_to_gold.get( system_cluster_id).get('aligned_similarity') if system_cluster_id != 'None': system_cluster = self.get('system_responses').get( 'document_clusters').get(document_id).get( system_cluster_id) metatype = system_cluster.get('metatype') if metatype not in ['Entity', 'Event']: continue if gold_cluster_id == 'None': precision, recall, f1 = [0, 0, 0] for metatype_key in ['ALL', metatype]: for language_key in ['ALL', language]: key = '{language}:{metatype}'.format( metatype=metatype_key, language=language_key) mean_f1s[key] = mean_f1s.get(key, 0) + f1 counts[key] = counts.get(key, 0) + 1 score = TypeMetricScore(self.logger, self.get('runid'), document_id, language, metatype, gold_cluster_id, system_cluster_id, precision, recall, f1) scores.append(score) elif aligned_similarity == 0: self.record_event('DEFAULT_CRITICAL_ERROR', 'aligned_similarity=0') scores_printer = ScorePrinter(self.logger, self.printing_specs, self.separator) for score in multisort(scores, (('document_id', False), ('metatype', False), ('gold_cluster_id', False), ('system_cluster_id', False))): scores_printer.add(score) for key in sorted(mean_f1s, key=self.order): mean_f1 = mean_f1s[key] / counts[key] if counts[key] else 0 language, metatype = key.split(':') mean_score = TypeMetricScore(self.logger, self.get('runid'), 'Summary', language, metatype, '', '', '', '', mean_f1, summary=True) scores_printer.add(mean_score) self.scores = scores_printer