def score_responses(self): scores = [] sum_average_precision = 0 for query_id in self.get('queries_to_score'): entity_id = self.get('entity_id', query_id) counts = self.get('counts', query_id) sum_average_precision += counts['average_precision'] score = AcrossDocumentsCoreferenceMetricScore(self.get('logger'), run_id=self.get('run_id'), query_id=query_id, entity_id=entity_id, **counts) scores.append(score) macro_counts = {'average_precision': sum_average_precision/len(self.get('queries_to_score'))} for field_name in [s.get('name') for s in self.get('printing_specs') if s.get('name').startswith('num_')]: macro_counts[field_name] = macro_counts[field_name] if field_name in macro_counts else '' macro_average_score = AcrossDocumentsCoreferenceMetricScore(self.get('logger'), run_id=self.get('run_id'), query_id='ALL-Macro', entity_id='Summary', summary=True, **macro_counts) scores_printer = ScorePrinter(self.logger, self.printing_specs) for score in multisort(scores, (('entity_id', False), ('query_id', False))): scores_printer.add(score) scores_printer.add(macro_average_score) self.scores = scores_printer
def score_responses(self): metatypes = { 'ALL': ['Event', 'Relation'], 'Event': ['Event'], 'Relation': ['Relation'] } scores = [] for document_id in self.get('core_documents'): language = self.get('gold_responses').get('document_mappings').get( 'documents').get(document_id).get('language') gold_trfs = self.get('document_type_role_fillers', 'gold', document_id) system_trfs = self.get('document_type_role_fillers', 'system', document_id) self.align_trfs(document_id, gold_trfs, system_trfs) for metatype_key in metatypes: num_gold_trf, num_system_trf, precision, recall, f1 = self.get( 'score', gold_trfs, system_trfs, metatypes[metatype_key]) if num_gold_trf + num_system_trf == 0: continue score = ArgumentMetricScore(logger=self.logger, run_id=self.get('run_id'), document_id=document_id, language=language, metatype=metatype_key, precision=precision, recall=recall, f1=f1) scores.append(score) scores_printer = ScorePrinter(self.logger, self.printing_specs) for score in multisort(scores, (('document_id', False), ('metatype_sortkey', False))): scores_printer.add(score) self.aggregate_scores(scores_printer, ArgumentMetricScore) self.scores = scores_printer
def score_responses(self): metatypes = { 'ALL': ['Entity', 'Event'], 'Entity': ['Entity'], 'Event': ['Event'] } scores = [] mean_f1s = {} counts = {} for document_id in self.get('core_documents'): document = self.get('gold_responses').get('document_mappings').get( 'documents').get(document_id) language = document.get('language') for metatype_key in metatypes: max_total_similarity = self.get('max_total_similarity', document_id, metatypes[metatype_key]) total_self_similarity_gold = self.get('total_self_similarity', 'gold', document_id, metatypes[metatype_key]) total_self_similarity_system = self.get( 'total_self_similarity', 'system', document_id, metatypes[metatype_key]) precision = max_total_similarity / total_self_similarity_system if total_self_similarity_system else 0 recall = max_total_similarity / total_self_similarity_gold f1 = 2 * precision * recall / ( precision + recall) if precision + recall else 0 score = CoreferenceMetricScore(self.logger, self.get('runid'), document_id, language, metatype_key, precision, recall, f1) for language_key in ['ALL', language]: key = '{language}:{metatype}'.format(language=language_key, metatype=metatype_key) mean_f1s[key] = mean_f1s.get(key, 0) + f1 counts[key] = counts.get(key, 0) + 1 scores.append(score) scores_printer = ScorePrinter(self.logger, self.printing_specs, self.separator) for score in multisort(scores, (('document_id', False), ('metatype_sortkey', False))): scores_printer.add(score) for key in sorted(mean_f1s, key=self.order): mean_f1 = mean_f1s[key] / counts[key] if counts[key] else 0 language, metatype = key.split(':') mean_score = CoreferenceMetricScore(self.logger, self.get('runid'), 'Summary', language, metatype, '', '', mean_f1, summary=True) scores_printer.add(mean_score) self.scores = scores_printer
def score_responses(self): metatypes = { 'ALL': ['Event', 'Relation'], 'Event': ['Event'], 'Relation': ['Relation'] } scores = [] mean_f1s = {} counts = {} for document_id in self.get('core_documents'): language = self.get('gold_responses').get('document_mappings').get( 'documents').get(document_id).get('language') gold_trfs = self.get('document_type_role_fillers', 'gold', document_id) system_trfs = self.get('document_type_role_fillers', 'system', document_id) self.align_trfs(document_id, gold_trfs, system_trfs) for metatype_key in metatypes: num_gold_trf, num_system_trf, precision, recall, f1 = self.get( 'score', gold_trfs, system_trfs, metatypes[metatype_key]) if num_gold_trf + num_system_trf == 0: continue for language_key in ['ALL', language]: aggregate_key = '{language}:{metatype}'.format( language=language_key, metatype=metatype_key) mean_f1s[aggregate_key] = mean_f1s.get(aggregate_key, 0) + f1 counts[aggregate_key] = counts.get(aggregate_key, 0) + 1 score = ArgumentMetricScore(self.logger, self.get('runid'), document_id, language, metatype_key, precision, recall, f1) scores.append(score) scores_printer = ScorePrinter(self.logger, self.printing_specs, self.separator) for score in multisort(scores, (('document_id', False), ('metatype_sortkey', False))): scores_printer.add(score) for key in sorted(mean_f1s, key=self.order): mean_f1 = mean_f1s[key] / counts[key] if counts[key] else 0 language, metatype = key.split(':') mean_score = ArgumentMetricScore(self.logger, self.get('runid'), 'Summary', language, metatype, '', '', mean_f1, summary=True) scores_printer.add(mean_score) self.scores = scores_printer
def score_responses(self): scores = [] for document_id in self.get('core_documents'): # add scores corresponding to all gold clusters document = self.get('gold_responses').get('document_mappings').get('documents').get(document_id) language = document.get('language') document_gold_to_system = self.get('cluster_alignment').get('gold_to_system').get(document_id) for gold_cluster_id in document_gold_to_system if document_gold_to_system else []: system_cluster_id = document_gold_to_system.get(gold_cluster_id).get('aligned_to') aligned_similarity = document_gold_to_system.get(gold_cluster_id).get('aligned_similarity') similarity = 0 if gold_cluster_id == 'None': continue gold_cluster = self.get('cluster', 'gold', document_id, gold_cluster_id) metatype = gold_cluster.get('metatype') if metatype not in ['Event', 'Relation']: continue if list(gold_cluster.get('dates').values())[0] is None: self.record_event('NO_TEMPORAL_CONSTRAINT', gold_cluster_id, document_id) continue if system_cluster_id != 'None': if aligned_similarity == 0: self.record_event('DEFAULT_CRITICAL_ERROR', 'aligned_similarity=0') system_cluster = self.get('cluster', 'system', document_id, system_cluster_id) if system_cluster.get('metatype') != metatype: self.record_event('UNEXPECTED_ALIGNED_CLUSTER_METATYPE', system_cluster.get('metatype'), system_cluster_id, metatype, gold_cluster_id) if len(gold_cluster.get('dates').keys()) > 1: self.record_event('UNEXPECTED_NUM_DATES', gold_cluster_id, document_id) similarity = self.get('temporal_similarity', list(gold_cluster.get('dates').values())[0], list(system_cluster.get('dates').values())) score = TemporalMetricScore(logger=self.logger, run_id=self.get('run_id'), document_id=document_id, language=language, metatype=metatype, gold_cluster_id=gold_cluster_id, system_cluster_id=system_cluster_id, similarity=similarity) scores.append(score) scores_printer = ScorePrinter(self.logger, self.printing_specs) for score in multisort(scores, (('document_id', False), ('metatype', False), ('gold_cluster_id', False), ('system_cluster_id', False))): scores_printer.add(score) self.aggregate_scores(scores_printer, TemporalMetricScore) self.scores = scores_printer
def score_responses(self): metatypes = { 'ALL': ['Entity', 'Event'], 'Entity': ['Entity'], 'Event': ['Event'] } scores = [] for document_id in self.get('core_documents'): document = self.get('gold_responses').get('document_mappings').get( 'documents').get(document_id) language = document.get('language') for metatype_key in metatypes: max_total_similarity = self.get('max_total_similarity', document_id, metatypes[metatype_key]) total_self_similarity_gold = self.get('total_self_similarity', 'gold', document_id, metatypes[metatype_key]) total_self_similarity_system = self.get( 'total_self_similarity', 'system', document_id, metatypes[metatype_key]) precision = max_total_similarity / total_self_similarity_system if total_self_similarity_system else 0 recall = max_total_similarity / total_self_similarity_gold f1 = 2 * precision * recall / ( precision + recall) if precision + recall else 0 score = CoreferenceMetricScore(logger=self.logger, run_id=self.get('run_id'), document_id=document_id, language=language, metatype=metatype_key, precision=precision, recall=recall, f1=f1) scores.append(score) scores_printer = ScorePrinter(self.logger, self.printing_specs) for score in multisort(scores, (('document_id', False), ('metatype_sortkey', False))): scores_printer.add(score) self.aggregate_scores(scores_printer, CoreferenceMetricScore) self.scores = scores_printer
def get_average_precision(self, document_id, gold_cluster_id, augmented_gold_types, system_cluster_id, augmented_system_types): entity_types = { 'gold': augmented_gold_types, 'system': augmented_system_types } type_weights = list() for entity_type in entity_types.get('system'): type_weight = { 'type': entity_type, 'weight': self.get('type_weight', entity_types.get('system').get(entity_type)) } type_weights.append(type_weight) num_ground_truth = len(entity_types.get('gold')) rank = 0 num_correct = 0 sum_precision = 0.0 for type_weight in multisort(type_weights, (('weight', True), ('type', False))): rank += 1 label = 'WRONG' if type_weight.get('type') in entity_types.get('gold'): label = 'RIGHT' num_correct += self.get('relevance_weight', type_weight.get('weight')) sum_precision += (num_correct / rank) self.record_event('TYPE_METRIC_AP_RANKED_LIST', self.__class__.__name__, document_id, gold_cluster_id, system_cluster_id, num_ground_truth, rank, type_weight.get('type'), label, type_weight.get('weight'), num_correct, sum_precision) average_precision = (sum_precision / num_ground_truth) if num_ground_truth else 0 return average_precision
def score_responses(self): scores = [] for document_id in self.get('core_documents'): # add scores corresponding to all gold clusters document = self.get('gold_responses').get('document_mappings').get( 'documents').get(document_id) language = document.get('language') document_gold_to_system = self.get('cluster_alignment').get( 'gold_to_system').get(document_id) document_system_to_gold = self.get('cluster_alignment').get( 'system_to_gold').get(document_id) for gold_cluster_id in document_gold_to_system if document_gold_to_system else []: system_cluster_id = document_gold_to_system.get( gold_cluster_id).get('aligned_to') aligned_similarity = document_gold_to_system.get( gold_cluster_id).get('aligned_similarity') precision, recall, f1 = [0, 0, 0] if gold_cluster_id == 'None': continue gold_cluster = self.get('cluster', 'gold', document_id, gold_cluster_id) metatype = gold_cluster.get('metatype') if metatype not in ['Event', 'Relation']: continue if system_cluster_id != 'None': if aligned_similarity == 0: self.record_event('DEFAULT_CRITICAL_ERROR', 'aligned_similarity=0') system_cluster = self.get('cluster', 'system', document_id, system_cluster_id) if system_cluster.get('metatype') != metatype: self.record_event( 'UNEXPECTED_ALIGNED_CLUSTER_METATYPE', system_cluster.get('metatype'), system_cluster_id, metatype, gold_cluster_id) gold_frame = self.get('frame', 'gold', document_id, gold_cluster_id) gold_slot_fillers = {} if gold_frame is None or len( gold_frame.get('role_fillers')) == 0: if gold_cluster.get('metatype') == 'Relation': self.record_event('MISSING_GOLD_FRAME', gold_cluster.get('metatype'), gold_cluster_id, document_id, self.get('code_location')) continue for role_name in gold_frame.get('role_fillers'): for gold_filler_cluster_id in gold_frame.get( 'role_fillers').get(role_name): gold_slot_fillers['{}:{}'.format( role_name, gold_filler_cluster_id)] = 1 system_frame = self.get('frame', 'system', document_id, system_cluster_id) if system_frame: system_slot_fillers = {} for role_name in system_frame.get('role_fillers'): for system_filler_cluster_id in system_frame.get( 'role_fillers').get(role_name): aligned_gold_filler_cluster_id = document_system_to_gold.get( system_filler_cluster_id).get('aligned_to') aligned_gold_filler_cluster_id_similarity = document_system_to_gold.get( system_filler_cluster_id).get( 'aligned_similarity') if aligned_gold_filler_cluster_id != 'None': if aligned_gold_filler_cluster_id_similarity == 0: self.record_event( 'DEFAULT_CRITICAL_ERROR', 'aligned_similarity=0') system_slot_fillers['{}:{}'.format( role_name, aligned_gold_filler_cluster_id)] = 1 else: system_slot_fillers['{}:{}'.format( role_name, system_filler_cluster_id)] = 1 if len(gold_slot_fillers) and len(system_slot_fillers): precision, recall, f1 = get_precision_recall_and_f1( set(gold_slot_fillers.keys()), set(system_slot_fillers.keys())) score = FrameMetricScore(logger=self.logger, run_id=self.get('run_id'), document_id=document_id, language=language, metatype=metatype, gold_cluster_id=gold_cluster_id, system_cluster_id=system_cluster_id, precision=precision, recall=recall, f1=f1) scores.append(score) # add scores corresponding to unaligned system clusters precision, recall, f1 = [0, 0, 0] for system_cluster_id in document_system_to_gold if document_system_to_gold else []: gold_cluster_id = document_system_to_gold.get( system_cluster_id).get('aligned_to') aligned_similarity = document_system_to_gold.get( system_cluster_id).get('aligned_similarity') if system_cluster_id != 'None': if gold_cluster_id == 'None': metatype = self.get('cluster', 'system', document_id, system_cluster_id).get('metatype') if metatype not in ['Event', 'Relation']: continue score = FrameMetricScore( logger=self.logger, run_id=self.get('run_id'), document_id=document_id, language=language, metatype=metatype, gold_cluster_id=gold_cluster_id, system_cluster_id=system_cluster_id, precision=precision, recall=recall, f1=f1) scores.append(score) elif aligned_similarity == 0: self.record_event('DEFAULT_CRITICAL_ERROR', 'aligned_similarity=0') scores_printer = ScorePrinter(self.logger, self.printing_specs) for score in multisort(scores, (('document_id', False), ('metatype', False), ('gold_cluster_id', False), ('system_cluster_id', False))): scores_printer.add(score) self.aggregate_scores(scores_printer, FrameMetricScore) self.scores = scores_printer
def score_responses(self): scores = [] mean_similarities = {} counts = {} for document_id in self.get('core_documents'): # add scores corresponding to all gold clusters document = self.get('gold_responses').get('document_mappings').get( 'documents').get(document_id) language = document.get('language') document_gold_to_system = self.get('cluster_alignment').get( 'gold_to_system').get(document_id) for gold_cluster_id in document_gold_to_system if document_gold_to_system else []: system_cluster_id = document_gold_to_system.get( gold_cluster_id).get('aligned_to') aligned_similarity = document_gold_to_system.get( gold_cluster_id).get('aligned_similarity') similarity = 0 if gold_cluster_id == 'None': continue gold_cluster = self.get('cluster', 'gold', document_id, gold_cluster_id) metatype = gold_cluster.get('metatype') if metatype not in ['Event', 'Relation']: continue if list(gold_cluster.get('dates').values())[0] is None: self.record_event('NO_TEMPORAL_CONSTRAINT', gold_cluster_id, document_id) continue if system_cluster_id != 'None': if aligned_similarity == 0: self.record_event('DEFAULT_CRITICAL_ERROR', 'aligned_similarity=0') system_cluster = self.get('cluster', 'system', document_id, system_cluster_id) if system_cluster.get('metatype') != metatype: self.record_event( 'UNEXPECTED_ALIGNED_CLUSTER_METATYPE', system_cluster.get('metatype'), system_cluster_id, metatype, gold_cluster_id) if len(gold_cluster.get('dates').keys()) > 1: self.record_event('UNEXPECTED_NUM_DATES', gold_cluster_id, document_id) similarity = self.get( 'temporal_similarity', list(gold_cluster.get('dates').values())[0], list(system_cluster.get('dates').values())) for metatype_key in ['ALL', metatype]: for language_key in ['ALL', language]: key = '{language}:{metatype}'.format( metatype=metatype_key, language=language_key) mean_similarities[key] = mean_similarities.get( key, 0) + similarity counts[key] = counts.get(key, 0) + 1 score = TemporalMetricScore(self.logger, self.get('runid'), document_id, language, metatype, gold_cluster_id, system_cluster_id, similarity) scores.append(score) scores_printer = ScorePrinter(self.logger, self.printing_specs, self.separator) for score in multisort(scores, (('document_id', False), ('metatype', False), ('gold_cluster_id', False), ('system_cluster_id', False))): scores_printer.add(score) for key in sorted(mean_similarities, key=self.order): mean_similarity = mean_similarities[key] / counts[key] if counts[ key] else 0 language, metatype = key.split(':') mean_score = TemporalMetricScore(self.logger, self.get('runid'), 'Summary', language, metatype, '', '', mean_similarity, summary=True) scores_printer.add(mean_score) self.scores = scores_printer
def score_responses(self): scores = [] for document_id in self.get('core_documents'): # add scores corresponding to all gold clusters document = self.get('gold_responses').get('document_mappings').get('documents').get(document_id) language = document.get('language') self.record_event('ANNOTATED_TYPES_INFO', document_id, ','.join(self.get('annotated_regions').get('types_annotated_for_document', document_id))) document_gold_to_system = self.get('cluster_alignment').get('gold_to_system').get(document_id) for gold_cluster_id in document_gold_to_system if document_gold_to_system else []: system_cluster_id = document_gold_to_system.get(gold_cluster_id).get('aligned_to') aligned_similarity = document_gold_to_system.get(gold_cluster_id).get('aligned_similarity') precision, recall, f1 = [0,0,0] if gold_cluster_id == 'None': continue gold_cluster = self.get('gold_responses').get('document_clusters').get(document_id).get(gold_cluster_id) metatype = gold_cluster.get('metatype') if metatype not in ['Entity', 'Event']: continue if system_cluster_id != 'None': if aligned_similarity == 0: self.record_event('DEFAULT_CRITICAL_ERROR', 'aligned_similarity=0') system_cluster = self.get('cluster', 'system', document_id, system_cluster_id) if system_cluster.get('metatype') != metatype: self.record_event('UNEXPECTED_ALIGNED_CLUSTER_METATYPE', system_cluster.get('metatype'), system_cluster_id, metatype, gold_cluster_id) gold_types = set(gold_cluster.get('all_expanded_types')) system_types = set() if document_id in self.get('system_responses').get('document_clusters'): system_types = set(self.get('system_responses').get('document_clusters').get(document_id).get(system_cluster_id).get('all_expanded_types')) augmented_gold_types = self.get('augmented_types', document_id, gold_types) augmented_system_types = self.get('augmented_types', document_id, system_types) self.record_event('TYPE_METRIC_SCORE_INFO', self.__class__.__name__, 'TYPES_SUBMITTED', document_id, gold_cluster_id, ','.join(gold_types), system_cluster_id, ','.join(system_types)) self.record_event('TYPE_METRIC_SCORE_INFO', self.__class__.__name__, 'TYPES_SCORED', document_id, gold_cluster_id, ','.join(augmented_gold_types), system_cluster_id, ','.join(augmented_system_types)) precision, recall, f1 = get_precision_recall_and_f1(augmented_gold_types, augmented_system_types) score = TypeMetricScoreV1(logger=self.logger, run_id=self.get('run_id'), document_id=document_id, language=language, metatype=metatype, gold_cluster_id=gold_cluster_id, system_cluster_id=system_cluster_id, precision=precision, recall=recall, f1=f1) scores.append(score) # add scores unaligned system clusters document_system_to_gold = self.get('cluster_alignment').get('system_to_gold').get(document_id) for system_cluster_id in document_system_to_gold if document_system_to_gold else []: gold_cluster_id = document_system_to_gold.get(system_cluster_id).get('aligned_to') aligned_similarity = document_system_to_gold.get(system_cluster_id).get('aligned_similarity') if system_cluster_id != 'None': system_cluster = self.get('system_responses').get('document_clusters').get(document_id).get(system_cluster_id) metatype = system_cluster.get('metatype') if metatype not in ['Entity', 'Event']: continue if gold_cluster_id == 'None': precision, recall, f1 = [0,0,0] score = TypeMetricScoreV1(logger=self.logger, run_id=self.get('run_id'), document_id=document_id, language=language, metatype=metatype, gold_cluster_id=gold_cluster_id, system_cluster_id=system_cluster_id, precision=precision, recall=recall, f1=f1) scores.append(score) elif aligned_similarity == 0: self.record_event('DEFAULT_CRITICAL_ERROR', 'aligned_similarity=0') scores_printer = ScorePrinter(self.logger, self.printing_specs) for score in multisort(scores, (('document_id', False), ('metatype', False), ('gold_cluster_id', False), ('system_cluster_id', False))): scores_printer.add(score) self.aggregate_scores(scores_printer, TypeMetricScoreV1) self.scores = scores_printer
def score_responses(self): scores = [] mean_f1s = {} counts = {} for document_id in self.get('core_documents'): # add scores corresponding to all gold clusters document = self.get('gold_responses').get('document_mappings').get( 'documents').get(document_id) language = document.get('language') document_gold_to_system = self.get('cluster_alignment').get( 'gold_to_system').get(document_id) document_system_to_gold = self.get('cluster_alignment').get( 'system_to_gold').get(document_id) for gold_cluster_id in document_gold_to_system if document_gold_to_system else []: system_cluster_id = document_gold_to_system.get( gold_cluster_id).get('aligned_to') aligned_similarity = document_gold_to_system.get( gold_cluster_id).get('aligned_similarity') precision, recall, f1 = [0, 0, 0] if gold_cluster_id == 'None': continue gold_cluster = self.get('cluster', 'gold', document_id, gold_cluster_id) metatype = gold_cluster.get('metatype') if metatype not in ['Event', 'Relation']: continue if system_cluster_id != 'None': if aligned_similarity == 0: self.record_event('DEFAULT_CRITICAL_ERROR', 'aligned_similarity=0') system_cluster = self.get('cluster', 'system', document_id, system_cluster_id) if system_cluster.get('metatype') != metatype: self.record_event( 'UNEXPECTED_ALIGNED_CLUSTER_METATYPE', system_cluster.get('metatype'), system_cluster_id, metatype, gold_cluster_id) gold_frame = self.get('frame', 'gold', document_id, gold_cluster_id) gold_slot_fillers = {} if gold_frame is None or len( gold_frame.get('role_fillers')) == 0: if gold_cluster.get('metatype') == 'Relation': self.record_event('MISSING_GOLD_FRAME', gold_cluster.get('metatype'), gold_cluster_id, document_id, self.get('code_location')) continue for role_name in gold_frame.get('role_fillers'): for gold_filler_cluster_id in gold_frame.get( 'role_fillers').get(role_name): gold_slot_fillers['{}:{}'.format( role_name, gold_filler_cluster_id)] = 1 system_frame = self.get('frame', 'system', document_id, system_cluster_id) if system_frame: system_slot_fillers = {} for role_name in system_frame.get('role_fillers'): for system_filler_cluster_id in system_frame.get( 'role_fillers').get(role_name): aligned_gold_filler_cluster_id = document_system_to_gold.get( system_filler_cluster_id).get('aligned_to') aligned_gold_filler_cluster_id_similarity = document_system_to_gold.get( system_filler_cluster_id).get( 'aligned_similarity') if aligned_gold_filler_cluster_id != 'None': if aligned_gold_filler_cluster_id_similarity == 0: self.record_event( 'DEFAULT_CRITICAL_ERROR', 'aligned_similarity=0') system_slot_fillers['{}:{}'.format( role_name, aligned_gold_filler_cluster_id)] = 1 else: system_slot_fillers['{}:{}'.format( role_name, system_filler_cluster_id)] = 1 if len(gold_slot_fillers) and len(system_slot_fillers): precision, recall, f1 = get_precision_recall_and_f1( set(gold_slot_fillers.keys()), set(system_slot_fillers.keys())) for metatype_key in ['ALL', metatype]: for language_key in ['ALL', language]: key = '{language}:{metatype}'.format( metatype=metatype_key, language=language_key) mean_f1s[key] = mean_f1s.get(key, 0) + f1 counts[key] = counts.get(key, 0) + 1 score = FrameMetricScore(self.logger, self.get('runid'), document_id, language, metatype, gold_cluster_id, system_cluster_id, precision, recall, f1) scores.append(score) # add scores corresponding to unaligned system clusters precision, recall, f1 = [0, 0, 0] for system_cluster_id in document_system_to_gold if document_system_to_gold else []: gold_cluster_id = document_system_to_gold.get( system_cluster_id).get('aligned_to') aligned_similarity = document_system_to_gold.get( system_cluster_id).get('aligned_similarity') if system_cluster_id != 'None': if gold_cluster_id == 'None': metatype = self.get('cluster', 'system', document_id, system_cluster_id).get('metatype') if metatype not in ['Event', 'Relation']: continue for metatype_key in ['ALL', metatype]: for language_key in ['ALL', language]: key = '{language}:{metatype}'.format( metatype=metatype_key, language=language_key) mean_f1s[key] = mean_f1s.get(key, 0) + f1 counts[key] = counts.get(key, 0) + 1 score = FrameMetricScore(self.logger, self.get('runid'), document_id, language, metatype, gold_cluster_id, system_cluster_id, precision, recall, f1) scores.append(score) elif aligned_similarity == 0: self.record_event('DEFAULT_CRITICAL_ERROR', 'aligned_similarity=0') scores_printer = ScorePrinter(self.logger, self.printing_specs, self.separator) for score in multisort(scores, (('document_id', False), ('metatype', False), ('gold_cluster_id', False), ('system_cluster_id', False))): scores_printer.add(score) for key in sorted(mean_f1s, key=self.order): mean_f1 = mean_f1s[key] / counts[key] if counts[key] else 0 language, metatype = key.split(':') mean_score = FrameMetricScore(self.logger, self.get('runid'), 'Summary', language, metatype, '', '', '', '', mean_f1, summary=True) scores_printer.add(mean_score) self.scores = scores_printer
def score_responses(self): scores = [] mean_f1s = {} counts = {} for document_id in self.get('core_documents'): # add scores corresponding to all gold clusters document = self.get('gold_responses').get('document_mappings').get( 'documents').get(document_id) language = document.get('language') self.record_event( 'ANNOTATED_TYPES_INFO', document_id, ','.join( self.get('annotated_regions').get( 'types_annotated_for_document', document_id))) document_gold_to_system = self.get('cluster_alignment').get( 'gold_to_system').get(document_id) for gold_cluster_id in document_gold_to_system if document_gold_to_system else []: system_cluster_id = document_gold_to_system.get( gold_cluster_id).get('aligned_to') aligned_similarity = document_gold_to_system.get( gold_cluster_id).get('aligned_similarity') precision, recall, f1 = [0, 0, 0] if gold_cluster_id == 'None': continue gold_cluster = self.get('gold_responses').get( 'document_clusters').get(document_id).get(gold_cluster_id) metatype = gold_cluster.get('metatype') if metatype not in ['Entity', 'Event']: continue if system_cluster_id != 'None': if aligned_similarity == 0: self.record_event('DEFAULT_CRITICAL_ERROR', 'aligned_similarity=0') system_cluster = self.get('cluster', 'system', document_id, system_cluster_id) if system_cluster.get('metatype') != metatype: self.record_event( 'UNEXPECTED_ALIGNED_CLUSTER_METATYPE', system_cluster.get('metatype'), system_cluster_id, metatype, gold_cluster_id) gold_types = set(gold_cluster.get('all_expanded_types')) system_types = set() if document_id in self.get('system_responses').get( 'document_clusters'): system_types = set( self.get('system_responses'). get('document_clusters').get(document_id).get( system_cluster_id).get('all_expanded_types')) augmented_gold_types = self.get('augmented_types', document_id, gold_types) augmented_system_types = self.get('augmented_types', document_id, system_types) self.record_event('TEMPORAL_METRIC_SCORE_INFO', 'TYPES_SUBMITTED', document_id, gold_cluster_id, ','.join(gold_types), system_cluster_id, ','.join(system_types)) self.record_event('TEMPORAL_METRIC_SCORE_INFO', 'TYPES_SCORED', document_id, gold_cluster_id, ','.join(augmented_gold_types), system_cluster_id, ','.join(augmented_system_types)) precision, recall, f1 = get_precision_recall_and_f1( augmented_gold_types, augmented_system_types) for metatype_key in ['ALL', metatype]: for language_key in ['ALL', language]: key = '{language}:{metatype}'.format( metatype=metatype_key, language=language_key) mean_f1s[key] = mean_f1s.get(key, 0) + f1 counts[key] = counts.get(key, 0) + 1 score = TypeMetricScore(self.logger, self.get('runid'), document_id, language, metatype, gold_cluster_id, system_cluster_id, precision, recall, f1) scores.append(score) # add scores unaligned system clusters document_system_to_gold = self.get('cluster_alignment').get( 'system_to_gold').get(document_id) for system_cluster_id in document_system_to_gold if document_system_to_gold else []: gold_cluster_id = document_system_to_gold.get( system_cluster_id).get('aligned_to') aligned_similarity = document_system_to_gold.get( system_cluster_id).get('aligned_similarity') if system_cluster_id != 'None': system_cluster = self.get('system_responses').get( 'document_clusters').get(document_id).get( system_cluster_id) metatype = system_cluster.get('metatype') if metatype not in ['Entity', 'Event']: continue if gold_cluster_id == 'None': precision, recall, f1 = [0, 0, 0] for metatype_key in ['ALL', metatype]: for language_key in ['ALL', language]: key = '{language}:{metatype}'.format( metatype=metatype_key, language=language_key) mean_f1s[key] = mean_f1s.get(key, 0) + f1 counts[key] = counts.get(key, 0) + 1 score = TypeMetricScore(self.logger, self.get('runid'), document_id, language, metatype, gold_cluster_id, system_cluster_id, precision, recall, f1) scores.append(score) elif aligned_similarity == 0: self.record_event('DEFAULT_CRITICAL_ERROR', 'aligned_similarity=0') scores_printer = ScorePrinter(self.logger, self.printing_specs, self.separator) for score in multisort(scores, (('document_id', False), ('metatype', False), ('gold_cluster_id', False), ('system_cluster_id', False))): scores_printer.add(score) for key in sorted(mean_f1s, key=self.order): mean_f1 = mean_f1s[key] / counts[key] if counts[key] else 0 language, metatype = key.split(':') mean_score = TypeMetricScore(self.logger, self.get('runid'), 'Summary', language, metatype, '', '', '', '', mean_f1, summary=True) scores_printer.add(mean_score) self.scores = scores_printer
def main(args): logger = Logger(args.log, args.log_specifications, sys.argv) type_mappings = Container(logger) for entry in FileHandler(logger, args.ontology_type_mappings): type_mappings.add(key=entry.get('full_type_ov'), value=entry.get('full_type')) text_boundaries = TextBoundaries(logger, args.sentence_boundaries) image_boundaries = ImageBoundaries(logger, args.image_boundaries) video_boundaries = VideoBoundaries(logger, args.video_boundaries) keyframe_boundaries = KeyFrameBoundaries(logger, args.keyframe_boundaries) document_boundaries = { 'text': text_boundaries, 'image': image_boundaries, 'keyframe': keyframe_boundaries, 'video': video_boundaries } output = [] for entry in FileHandler(logger, args.input): document_id = entry.get('root_doc_id') document_element_id = entry.get('doc_element_id') modality = entry.get('media_type') type = entry.get('type') subtype = entry.get('subtype') subsubtype = entry.get('subsubtype') # apply patch to correct LDC's mistake in annotation if type == 'personalsocial' and subtype == 'unspecified': subtype = 'relationship' full_type = '{type}.{subtype}.{subsubtype}'.format( type=type, subtype=subtype, subsubtype=subsubtype) full_type_cleaned = full_type.replace('.unspecified', '') propercased_full_type = type_mappings.get(full_type_cleaned, None) if propercased_full_type is None: logger.record_event( 'DEFAULT_CRITICAL_ERROR', 'propercased_full_type is None for full_type: {}'.format( full_type)) span_string = entry.get('span') keyframe_id = None keyframe_num = 0 if span_string == 'ENTIRE_DOCUMENT_ELEMENT': document_boundary = document_boundaries.get(modality).get( document_element_id) span_string = document_boundary.__str__() elif '-' in span_string: start, end = span_string.split('-') span_string = '({start},0)-({end},0)'.format(start=start, end=end) elif '_' in span_string: keyframe_id = span_string keyframe_num = span_string.split('_')[1] document_boundary = document_boundaries.get('keyframe').get( keyframe_id) span_string = document_boundary.__str__() else: span_string = None output_object = { 'document_id': document_id, 'document_element_id': document_element_id, 'keyframe_id': keyframe_id, 'keyframe_num': int(keyframe_num), 'modality': modality, 'region': span_string, 'type': propercased_full_type, } output.append(output_object) printed = {} fh = open(args.output, 'w') header = [ 'document_id', 'document_element_or_keyframe_id', 'modality', 'region', 'type' ] fh.write('{}\n'.format('\t'.join(header))) for output_object in multisort( output, (('document_id', False), ('modality', False), ('document_element_id', False), ('keyframe_num', False), ('region', False), ('type', False))): line = get_line(output_object, header) if line not in printed: fh.write('{}\n'.format(line)) printed[line] = 1 fh.close() exit(ALLOK_EXIT_CODE)