def record_alignment(self, similarities, mappings): if len(similarities) == 0: return cost_matrix = get_cost_matrix(similarities, mappings) for gold_cluster_index, system_cluster_index in Munkres().compute( cost_matrix): gold_cluster = self.get( 'cluster', 'gold', mappings['gold']['index_to_id'][gold_cluster_index]) system_cluster = self.get( 'cluster', 'system', mappings['system']['index_to_id'][system_cluster_index]) similarity = self.lookup_similarity(similarities, gold_cluster.get('ID'), system_cluster.get('ID')) if similarity > 0: self.get('alignment').get('gold_to_system')[gold_cluster.get( 'ID')] = { 'aligned_to': system_cluster.get('ID'), 'aligned_similarity': similarity } self.get('alignment').get('system_to_gold')[system_cluster.get( 'ID')] = { 'aligned_to': gold_cluster.get('ID'), 'aligned_similarity': similarity }
def get_entity_and_event_similarity(self, gold_cluster, system_cluster): similarity = 0 if self.get('number_of_matching_types', gold_cluster.get('top_level_types'), system_cluster.get('top_level_types')): mentions = { 'gold': list(gold_cluster.get('mentions').values()), 'system': list(system_cluster.get('mentions').values()) } mappings = {} for filetype in mentions: mappings[filetype] = {'id_to_index': {}, 'index_to_id': {}} index = 0 for mention in mentions[filetype]: mappings[filetype]['id_to_index'][mention.get( 'ID')] = index mappings[filetype]['index_to_id'][index] = mention.get( 'ID') index += 1 similarities = {} for gold_mention in mentions['gold']: for system_mention in mentions['system']: if gold_mention.get('ID') not in similarities: similarities[gold_mention.get('ID')] = {} iou = get_intersection_over_union(gold_mention, system_mention) iou = 0 if iou < 0.8 else iou similarities[gold_mention.get('ID')][system_mention.get( 'ID')] = iou cost_matrix = get_cost_matrix(similarities, mappings) alignment = {'gold_mention': {}, 'system_mention': {}} for gold_mention_index, system_mention_index in Munkres().compute( cost_matrix): gold_mention_id = mappings['gold']['index_to_id'][ gold_mention_index] system_mention_id = mappings['system']['index_to_id'][ system_mention_index] alignment['gold_mention'][gold_mention_id] = { 'system_mention': system_mention_id, 'score': similarities[gold_mention_id][system_mention_id] } alignment['system_mention'][system_mention_id] = { 'gold_mention': gold_mention_id, 'score': similarities[gold_mention_id][system_mention_id] } if similarities[gold_mention_id][system_mention_id] > 0: # lenient similarity computation similarity += 1 # alternative would be to add up the amount of overlap # similarity += similarities[gold_mention_id][system_mention_id] return similarity
def get_counts(self, query_id): def apply_normalization_and_compute_weights(responses, cluster_id, APPLY_NORMALIZATION, APPLY_WEIGHTS): def compute_weights(responses, APPLY_NORMALIZATION, APPLY_WEIGHTS): for response in responses.values(): weight = 1 if APPLY_WEIGHTS: if APPLY_NORMALIZATION: weight = response.get('normalized_justification_confidence') else: weight = trim_cv(response.get('justification_confidence')) response.set('weight', weight) def normalize_confidences(responses, cluster_id): max_confidence = None for response in responses.values(): if response.get('cluster_id') != cluster_id: continue justification_confidence = trim_cv(response.get('justification_confidence')) if max_confidence is None: max_confidence = justification_confidence if justification_confidence > max_confidence: max_confidence = justification_confidence for response in responses.values(): normalized_confidence_value = trim_cv(response.get('justification_confidence'))/max_confidence response.set('normalized_justification_confidence', normalized_confidence_value) if APPLY_NORMALIZATION: normalize_confidences(responses, cluster_id) compute_weights(responses, APPLY_NORMALIZATION, APPLY_WEIGHTS) def order(r): if r.get('is_pooled') and r.get('assessment') is not None: return r.get('response_rank') return MAXINT def compute_AP(logger, query_id, num_ground_truth, responses, cluster_id, fqec, TRUNCATE): num_responses = 0 num_right = 0 sum_precision = 0 for response in sorted(responses.values(), key=order): if response.get('cluster_id') != cluster_id: continue if response.get('is_pooled') and response.get('valid') and response.get('assessment') is not None: post_policy_assessment = response.get('categorization').get('POST_POLICY') response_fqec = response.get('assessment').get('fqec') if TRUNCATE and num_responses == TRUNCATE: break num_responses += 1 if post_policy_assessment == 'RIGHT' and fqec == response_fqec: num_right += response.get('weight') sum_precision += num_right/num_responses logger.record_event('AP_RANKED_LIST', query_id, num_ground_truth, cluster_id, fqec, num_responses, response.get('mention_span_text'), post_policy_assessment, response.get('weight'), sum_precision, response.get('where')) ap = sum_precision/num_ground_truth if num_ground_truth else 0 logger.record_event('PAIR_WISE_AP', query_id, cluster_id, fqec, ap) return ap def lookup_AP(APs, item_a, item_b): if item_a in APs: if item_b in APs.get(item_a): return APs.get(item_a).get(item_b) return 0 def record_categorized_response(categorized_responses, policy, category_name, response): categorized_responses.get(policy).setdefault(category_name, list()).append(response) if response.get('categorization') is None: response.set('categorization', {'PRE_POLICY': set(), 'POST_POLICY': None}) if policy == 'PRE_POLICY': response.get('categorization').get(policy).add(category_name) else: # ['RIGHT', 'WRONG', 'IGNORE'] are the only allowed values if category_name not in ['RIGHT', 'WRONG', 'IGNORED']: response.record_event('INVALID_POSTPOLICY_CATEGORIZATION', category_name, response.get('where')) if response.get('categorization').get(policy) is None: response.get('categorization')['POST_POLICY'] = category_name # Overwriting POST_POLICY assessment with a different value is not allowed elif response.get('categorization').get(policy) != category_name: response.record_event('OVERWRITING_POSTPOLICY_CATEGORIZATION', response.get('categorization').get(policy), category_name, response.get('where')) def categorize_responses(responses, selected_clusters, categorized_responses, ids): if responses is None: return selected_cluster_justifications = {} for cluster_id in selected_clusters: selected_cluster_justifications[cluster_id] = pooler.get('top_K_cluster_justifications', selected_clusters[cluster_id], K=num_documents) for response in responses.values(): record_categorized_response(categorized_responses, 'PRE_POLICY', 'SUBMITTED', response) if response.get('cluster_id') in selected_clusters: if response.get('valid'): record_categorized_response(categorized_responses, 'PRE_POLICY', 'VALID', response) if response.get('is_pooled'): record_categorized_response(categorized_responses, 'PRE_POLICY', 'METPOOLINGCRITERIA', response) else: record_categorized_response(categorized_responses, 'PRE_POLICY', 'NOTMETPOOLINGCRITERIA', response) record_categorized_response(categorized_responses, 'POST_POLICY', 'IGNORED', response) continue else: record_categorized_response(categorized_responses, 'PRE_POLICY', 'INVALID', response) record_categorized_response(categorized_responses, 'PRE_POLICY', 'NOTMETPOOLINGCRITERIA', response) record_categorized_response(categorized_responses, 'POST_POLICY', 'IGNORED', response) continue else: if response.get('valid'): record_categorized_response(categorized_responses, 'PRE_POLICY', 'VALID', response) else: record_categorized_response(categorized_responses, 'PRE_POLICY', 'INVALID', response) record_categorized_response(categorized_responses, 'PRE_POLICY', 'NOTMETPOOLINGCRITERIA', response) record_categorized_response(categorized_responses, 'POST_POLICY', 'IGNORED', response) continue mention_span_text = response.get('mention_span_text') pre_policy_assessment = None if mention_span_text in assessments: response.set('assessment', assessments.get(mention_span_text)) pre_policy_assessment = assessments.get(mention_span_text).get('assessment') post_policy_assessment = 'RIGHT' if pre_policy_assessment in ['CORRECT', 'INEXACT'] else 'WRONG' record_categorized_response(categorized_responses, 'PRE_POLICY', pre_policy_assessment, response) record_categorized_response(categorized_responses, 'POST_POLICY', post_policy_assessment, response) else: record_categorized_response(categorized_responses, 'PRE_POLICY', 'NOTASSESSED', response) record_categorized_response(categorized_responses, 'POST_POLICY', 'IGNORED', response) logger.record_event('ITEM_MET_POOLING_CRITERIA_BUT_NOT_ASSESSED', mention_span_text, response.get('where')) continue selected_justifications = selected_cluster_justifications[response.get('cluster_id')] if mention_span_text in selected_justifications: response.set('response_rank', selected_justifications[mention_span_text]['response_rank']) response.set('cluster_rank', selected_justifications[mention_span_text]['cluster_rank']) ids['clusters'].add(response.get('cluster_id')) if post_policy_assessment == 'RIGHT': ids['equivalence_classes'].add(response.get('assessment').get('fqec')) for response in responses.values(): logger.record_event('RESPONSE_CATEGORIZATION_INFO', query_id, response.get('cluster_id'), response.get('mention_span_text'), response.get('linking_confidence'), response.get('cluster_rank'), response.get('justification_confidence'), response.get('weight'), response.get('response_rank'), ','.join(sorted(response.get('categorization').get('PRE_POLICY'))), ','.join(sorted(response.get('categorization').get('POST_POLICY'))), response.get('where') ) logger = self.get('logger') responses = self.get('query_responses', query_id) assessments = self.get('entity_assessments', query_id) pooler = Task2Pool(logger, DONOT_VALIDATE_DESCRIPTOR=True) num_clusters = int(self.get('queries_to_score').get(query_id).get('clusters')) num_documents = int(self.get('queries_to_score').get(query_id).get('documents')) selected_clusters = pooler.get('top_C_clusters', responses, C=num_clusters) if responses else [] for cluster_id in selected_clusters: apply_normalization_and_compute_weights(responses, cluster_id, APPLY_NORMALIZATION=self.get('normalize'), APPLY_WEIGHTS=self.get('weighted')) ids = { 'clusters': set(), 'equivalence_classes': self.get('equivalence_classes', query_id) } categorized_responses = {'PRE_POLICY': {}, 'POST_POLICY': {}} categorize_responses(responses, selected_clusters, categorized_responses, ids) num_rel_documents = self.get('num_rel_documents', query_id) num_rel_documents_counted = num_rel_documents truncate = False if self.get('cutoff'): truncate = num_documents if num_rel_documents_counted > num_documents: num_rel_documents_counted = num_documents APs = {} for cluster_id in ids['clusters']: if cluster_id not in APs: APs[cluster_id] = {} for fqec in ids['equivalence_classes']: APs[cluster_id][fqec] = compute_AP(logger, query_id, num_rel_documents_counted, responses, cluster_id, fqec, truncate) mappings = {} for item_type in ['clusters', 'equivalence_classes']: mappings[item_type] = {'id_to_index': {}, 'index_to_id': {}} index = 0 for item_id in sorted(ids.get(item_type)): mappings[item_type]['id_to_index'][item_id] = index mappings[item_type]['index_to_id'][index] = item_id index += 1 alignment = {'cluster_to_fqec': {}, 'fqec_to_cluster': {}} if len(APs): cost_matrix = get_cost_matrix(APs, mappings, type_a='clusters', type_b='equivalence_classes') for cluster_index, fqec_index in Munkres().compute(cost_matrix): cluster_id = mappings['clusters']['index_to_id'][cluster_index] fqec = mappings['equivalence_classes']['index_to_id'][fqec_index] AP = lookup_AP(APs, cluster_id, fqec) if AP > 0: alignment.get('cluster_to_fqec')[cluster_id] = { 'aligned_to': fqec, 'AP': AP } alignment.get('fqec_to_cluster')[fqec] = { 'aligned_to': cluster_id, 'AP': AP } logger.record_event('ALIGNMENT_INFO', query_id, cluster_id, fqec) sum_average_precision = 0 denominator_for_mean = len(ids['equivalence_classes']) if denominator_for_mean > num_clusters: denominator_for_mean = num_clusters for cluster_id in alignment.get('cluster_to_fqec'): sum_average_precision += alignment.get('cluster_to_fqec').get(cluster_id).get('AP') score = sum_average_precision/denominator_for_mean if denominator_for_mean != 0 else 0 counts = {'average_precision': score, 'num_rel_documents': num_rel_documents, 'num_rel_documents_counted': num_rel_documents_counted} for field_name in [s.get('name') for s in self.get('printing_specs') if s.get('name').startswith('num_')]: counts[field_name] = counts[field_name] if field_name in counts else self.get(field_name, categorized_responses) return counts
def get_entity_and_event_similarity(self, gold_cluster, system_cluster): similarity = 0 if self.get('number_of_matching_types', gold_cluster.get('top_level_types'), system_cluster.get('top_level_types')): mentions = { 'gold': list(gold_cluster.get('mentions').values()), 'system': list(system_cluster.get('mentions').values()) } mappings = {} for filetype in mentions: mappings[filetype] = {'id_to_index': {}, 'index_to_id': {}} index = 0 for mention in mentions[filetype]: mappings[filetype]['id_to_index'][mention.get( 'ID')] = index mappings[filetype]['index_to_id'][index] = mention.get( 'ID') index += 1 similarities = {} for gold_mention in mentions['gold']: document_element_id = gold_mention.get('document_element_id') modality = self.get('document_mappings').get( 'modality', document_element_id) language = self.get('document_mappings').get( 'language', document_element_id) for system_mention in mentions['system']: if gold_mention.get('ID') not in similarities: similarities[gold_mention.get('ID')] = {} iou = get_intersection_over_union(gold_mention, system_mention) iou = 0 if iou < self.get('threshold', modality, language) else iou similarities[gold_mention.get('ID')][system_mention.get( 'ID')] = iou cost_matrix = get_cost_matrix(similarities, mappings) alignment = {'gold_mention': {}, 'system_mention': {}} for gold_mention_index, system_mention_index in Munkres().compute( cost_matrix): gold_mention_id = mappings['gold']['index_to_id'][ gold_mention_index] system_mention_id = mappings['system']['index_to_id'][ system_mention_index] alignment['gold_mention'][gold_mention_id] = { 'system_mention': system_mention_id, 'score': similarities[gold_mention_id][system_mention_id] } alignment['system_mention'][system_mention_id] = { 'gold_mention': gold_mention_id, 'score': similarities[gold_mention_id][system_mention_id] } if similarities[gold_mention_id][system_mention_id] > 0: # lenient similarity computation if self.get('weighted') == 'no': # total mentions similarity += 1 elif self.get('weighted') == 'yes': # total iou similarity += similarities[gold_mention_id][ system_mention_id] return similarity