示例#1
0
    def score_responses(self):
        scores = []
        sum_average_precision = 0
        for query_id in self.get('queries_to_score'):
            entity_id = self.get('entity_id', query_id)
            counts = self.get('counts', query_id)
            sum_average_precision += counts['average_precision']
            score = AcrossDocumentsCoreferenceMetricScore(self.get('logger'),
                                                          run_id=self.get('run_id'),
                                                          query_id=query_id,
                                                          entity_id=entity_id,
                                                          **counts)
            scores.append(score)

        macro_counts = {'average_precision': sum_average_precision/len(self.get('queries_to_score'))}
        for field_name in [s.get('name') for s in self.get('printing_specs') if s.get('name').startswith('num_')]:
            macro_counts[field_name] = macro_counts[field_name] if field_name in macro_counts else ''
        macro_average_score = AcrossDocumentsCoreferenceMetricScore(self.get('logger'),
                                                                    run_id=self.get('run_id'),
                                                                    query_id='ALL-Macro',
                                                                    entity_id='Summary',
                                                                    summary=True,
                                                                    **macro_counts)

        scores_printer = ScorePrinter(self.logger, self.printing_specs)
        for score in multisort(scores, (('entity_id', False),
                                        ('query_id', False))):
            scores_printer.add(score)
        scores_printer.add(macro_average_score)
        self.scores = scores_printer
    def score_responses(self):
        metatypes = {
            'ALL': ['Event', 'Relation'],
            'Event': ['Event'],
            'Relation': ['Relation']
        }
        scores = []
        for document_id in self.get('core_documents'):
            language = self.get('gold_responses').get('document_mappings').get(
                'documents').get(document_id).get('language')
            gold_trfs = self.get('document_type_role_fillers', 'gold',
                                 document_id)
            system_trfs = self.get('document_type_role_fillers', 'system',
                                   document_id)
            self.align_trfs(document_id, gold_trfs, system_trfs)
            for metatype_key in metatypes:
                num_gold_trf, num_system_trf, precision, recall, f1 = self.get(
                    'score', gold_trfs, system_trfs, metatypes[metatype_key])
                if num_gold_trf + num_system_trf == 0: continue
                score = ArgumentMetricScore(logger=self.logger,
                                            run_id=self.get('run_id'),
                                            document_id=document_id,
                                            language=language,
                                            metatype=metatype_key,
                                            precision=precision,
                                            recall=recall,
                                            f1=f1)
                scores.append(score)

        scores_printer = ScorePrinter(self.logger, self.printing_specs)
        for score in multisort(scores, (('document_id', False),
                                        ('metatype_sortkey', False))):
            scores_printer.add(score)
        self.aggregate_scores(scores_printer, ArgumentMetricScore)
        self.scores = scores_printer
示例#3
0
    def score_responses(self):
        metatypes = {
            'ALL': ['Entity', 'Event'],
            'Entity': ['Entity'],
            'Event': ['Event']
        }
        scores = []
        mean_f1s = {}
        counts = {}
        for document_id in self.get('core_documents'):
            document = self.get('gold_responses').get('document_mappings').get(
                'documents').get(document_id)
            language = document.get('language')
            for metatype_key in metatypes:
                max_total_similarity = self.get('max_total_similarity',
                                                document_id,
                                                metatypes[metatype_key])
                total_self_similarity_gold = self.get('total_self_similarity',
                                                      'gold', document_id,
                                                      metatypes[metatype_key])
                total_self_similarity_system = self.get(
                    'total_self_similarity', 'system', document_id,
                    metatypes[metatype_key])

                precision = max_total_similarity / total_self_similarity_system if total_self_similarity_system else 0
                recall = max_total_similarity / total_self_similarity_gold
                f1 = 2 * precision * recall / (
                    precision + recall) if precision + recall else 0
                score = CoreferenceMetricScore(self.logger, self.get('runid'),
                                               document_id, language,
                                               metatype_key, precision, recall,
                                               f1)
                for language_key in ['ALL', language]:
                    key = '{language}:{metatype}'.format(language=language_key,
                                                         metatype=metatype_key)
                    mean_f1s[key] = mean_f1s.get(key, 0) + f1
                    counts[key] = counts.get(key, 0) + 1
                scores.append(score)

        scores_printer = ScorePrinter(self.logger, self.printing_specs,
                                      self.separator)
        for score in multisort(scores, (('document_id', False),
                                        ('metatype_sortkey', False))):
            scores_printer.add(score)

        for key in sorted(mean_f1s, key=self.order):
            mean_f1 = mean_f1s[key] / counts[key] if counts[key] else 0
            language, metatype = key.split(':')
            mean_score = CoreferenceMetricScore(self.logger,
                                                self.get('runid'),
                                                'Summary',
                                                language,
                                                metatype,
                                                '',
                                                '',
                                                mean_f1,
                                                summary=True)
            scores_printer.add(mean_score)
        self.scores = scores_printer
示例#4
0
    def score_responses(self):
        metatypes = {
            'ALL': ['Event', 'Relation'],
            'Event': ['Event'],
            'Relation': ['Relation']
        }
        scores = []
        mean_f1s = {}
        counts = {}
        for document_id in self.get('core_documents'):
            language = self.get('gold_responses').get('document_mappings').get(
                'documents').get(document_id).get('language')
            gold_trfs = self.get('document_type_role_fillers', 'gold',
                                 document_id)
            system_trfs = self.get('document_type_role_fillers', 'system',
                                   document_id)
            self.align_trfs(document_id, gold_trfs, system_trfs)
            for metatype_key in metatypes:
                num_gold_trf, num_system_trf, precision, recall, f1 = self.get(
                    'score', gold_trfs, system_trfs, metatypes[metatype_key])
                if num_gold_trf + num_system_trf == 0: continue
                for language_key in ['ALL', language]:
                    aggregate_key = '{language}:{metatype}'.format(
                        language=language_key, metatype=metatype_key)
                    mean_f1s[aggregate_key] = mean_f1s.get(aggregate_key,
                                                           0) + f1
                    counts[aggregate_key] = counts.get(aggregate_key, 0) + 1
                score = ArgumentMetricScore(self.logger, self.get('runid'),
                                            document_id, language,
                                            metatype_key, precision, recall,
                                            f1)
                scores.append(score)

        scores_printer = ScorePrinter(self.logger, self.printing_specs,
                                      self.separator)
        for score in multisort(scores, (('document_id', False),
                                        ('metatype_sortkey', False))):
            scores_printer.add(score)

        for key in sorted(mean_f1s, key=self.order):
            mean_f1 = mean_f1s[key] / counts[key] if counts[key] else 0
            language, metatype = key.split(':')
            mean_score = ArgumentMetricScore(self.logger,
                                             self.get('runid'),
                                             'Summary',
                                             language,
                                             metatype,
                                             '',
                                             '',
                                             mean_f1,
                                             summary=True)
            scores_printer.add(mean_score)

        self.scores = scores_printer
示例#5
0
    def score_responses(self):
        scores = []
        for document_id in self.get('core_documents'):
            # add scores corresponding to all gold clusters
            document = self.get('gold_responses').get('document_mappings').get('documents').get(document_id)
            language = document.get('language')
            document_gold_to_system = self.get('cluster_alignment').get('gold_to_system').get(document_id)
            for gold_cluster_id in document_gold_to_system if document_gold_to_system else []:
                system_cluster_id = document_gold_to_system.get(gold_cluster_id).get('aligned_to')
                aligned_similarity = document_gold_to_system.get(gold_cluster_id).get('aligned_similarity')
                similarity = 0
                if gold_cluster_id == 'None': continue
                gold_cluster = self.get('cluster', 'gold', document_id, gold_cluster_id)
                metatype = gold_cluster.get('metatype')
                if metatype not in ['Event', 'Relation']: continue
                if list(gold_cluster.get('dates').values())[0] is None:
                    self.record_event('NO_TEMPORAL_CONSTRAINT', gold_cluster_id, document_id)
                    continue
                if system_cluster_id != 'None':
                    if aligned_similarity == 0:
                        self.record_event('DEFAULT_CRITICAL_ERROR', 'aligned_similarity=0')
                    system_cluster = self.get('cluster', 'system', document_id, system_cluster_id)
                    if system_cluster.get('metatype') != metatype:
                        self.record_event('UNEXPECTED_ALIGNED_CLUSTER_METATYPE', system_cluster.get('metatype'), system_cluster_id, metatype, gold_cluster_id)
                    if len(gold_cluster.get('dates').keys()) > 1:
                        self.record_event('UNEXPECTED_NUM_DATES', gold_cluster_id, document_id)
                    similarity = self.get('temporal_similarity', list(gold_cluster.get('dates').values())[0], list(system_cluster.get('dates').values()))
                score = TemporalMetricScore(logger=self.logger,
                                            run_id=self.get('run_id'),
                                            document_id=document_id,
                                            language=language,
                                            metatype=metatype,
                                            gold_cluster_id=gold_cluster_id,
                                            system_cluster_id=system_cluster_id,
                                            similarity=similarity)
                scores.append(score)

        scores_printer = ScorePrinter(self.logger, self.printing_specs)
        for score in multisort(scores, (('document_id', False),
                                        ('metatype', False),
                                        ('gold_cluster_id', False),
                                        ('system_cluster_id', False))):
            scores_printer.add(score)
        self.aggregate_scores(scores_printer, TemporalMetricScore)
        self.scores = scores_printer
示例#6
0
    def score_responses(self):
        metatypes = {
            'ALL': ['Entity', 'Event'],
            'Entity': ['Entity'],
            'Event': ['Event']
        }
        scores = []
        for document_id in self.get('core_documents'):
            document = self.get('gold_responses').get('document_mappings').get(
                'documents').get(document_id)
            language = document.get('language')
            for metatype_key in metatypes:
                max_total_similarity = self.get('max_total_similarity',
                                                document_id,
                                                metatypes[metatype_key])
                total_self_similarity_gold = self.get('total_self_similarity',
                                                      'gold', document_id,
                                                      metatypes[metatype_key])
                total_self_similarity_system = self.get(
                    'total_self_similarity', 'system', document_id,
                    metatypes[metatype_key])

                precision = max_total_similarity / total_self_similarity_system if total_self_similarity_system else 0
                recall = max_total_similarity / total_self_similarity_gold
                f1 = 2 * precision * recall / (
                    precision + recall) if precision + recall else 0
                score = CoreferenceMetricScore(logger=self.logger,
                                               run_id=self.get('run_id'),
                                               document_id=document_id,
                                               language=language,
                                               metatype=metatype_key,
                                               precision=precision,
                                               recall=recall,
                                               f1=f1)
                scores.append(score)

        scores_printer = ScorePrinter(self.logger, self.printing_specs)
        for score in multisort(scores, (('document_id', False),
                                        ('metatype_sortkey', False))):
            scores_printer.add(score)
        self.aggregate_scores(scores_printer, CoreferenceMetricScore)
        self.scores = scores_printer
示例#7
0
    def get_average_precision(self, document_id, gold_cluster_id,
                              augmented_gold_types, system_cluster_id,
                              augmented_system_types):
        entity_types = {
            'gold': augmented_gold_types,
            'system': augmented_system_types
        }
        type_weights = list()
        for entity_type in entity_types.get('system'):
            type_weight = {
                'type':
                entity_type,
                'weight':
                self.get('type_weight',
                         entity_types.get('system').get(entity_type))
            }
            type_weights.append(type_weight)

        num_ground_truth = len(entity_types.get('gold'))
        rank = 0
        num_correct = 0
        sum_precision = 0.0
        for type_weight in multisort(type_weights,
                                     (('weight', True), ('type', False))):
            rank += 1
            label = 'WRONG'
            if type_weight.get('type') in entity_types.get('gold'):
                label = 'RIGHT'
                num_correct += self.get('relevance_weight',
                                        type_weight.get('weight'))
                sum_precision += (num_correct / rank)
            self.record_event('TYPE_METRIC_AP_RANKED_LIST',
                              self.__class__.__name__, document_id,
                              gold_cluster_id,
                              system_cluster_id, num_ground_truth, rank,
                              type_weight.get('type'), label,
                              type_weight.get('weight'), num_correct,
                              sum_precision)

        average_precision = (sum_precision /
                             num_ground_truth) if num_ground_truth else 0
        return average_precision
示例#8
0
    def score_responses(self):
        scores = []
        for document_id in self.get('core_documents'):
            # add scores corresponding to all gold clusters
            document = self.get('gold_responses').get('document_mappings').get(
                'documents').get(document_id)
            language = document.get('language')
            document_gold_to_system = self.get('cluster_alignment').get(
                'gold_to_system').get(document_id)
            document_system_to_gold = self.get('cluster_alignment').get(
                'system_to_gold').get(document_id)
            for gold_cluster_id in document_gold_to_system if document_gold_to_system else []:
                system_cluster_id = document_gold_to_system.get(
                    gold_cluster_id).get('aligned_to')
                aligned_similarity = document_gold_to_system.get(
                    gold_cluster_id).get('aligned_similarity')
                precision, recall, f1 = [0, 0, 0]
                if gold_cluster_id == 'None': continue
                gold_cluster = self.get('cluster', 'gold', document_id,
                                        gold_cluster_id)
                metatype = gold_cluster.get('metatype')
                if metatype not in ['Event', 'Relation']: continue
                if system_cluster_id != 'None':
                    if aligned_similarity == 0:
                        self.record_event('DEFAULT_CRITICAL_ERROR',
                                          'aligned_similarity=0')
                    system_cluster = self.get('cluster', 'system', document_id,
                                              system_cluster_id)
                    if system_cluster.get('metatype') != metatype:
                        self.record_event(
                            'UNEXPECTED_ALIGNED_CLUSTER_METATYPE',
                            system_cluster.get('metatype'), system_cluster_id,
                            metatype, gold_cluster_id)
                    gold_frame = self.get('frame', 'gold', document_id,
                                          gold_cluster_id)
                    gold_slot_fillers = {}
                    if gold_frame is None or len(
                            gold_frame.get('role_fillers')) == 0:
                        if gold_cluster.get('metatype') == 'Relation':
                            self.record_event('MISSING_GOLD_FRAME',
                                              gold_cluster.get('metatype'),
                                              gold_cluster_id, document_id,
                                              self.get('code_location'))
                        continue
                    for role_name in gold_frame.get('role_fillers'):
                        for gold_filler_cluster_id in gold_frame.get(
                                'role_fillers').get(role_name):
                            gold_slot_fillers['{}:{}'.format(
                                role_name, gold_filler_cluster_id)] = 1
                    system_frame = self.get('frame', 'system', document_id,
                                            system_cluster_id)
                    if system_frame:
                        system_slot_fillers = {}
                        for role_name in system_frame.get('role_fillers'):
                            for system_filler_cluster_id in system_frame.get(
                                    'role_fillers').get(role_name):
                                aligned_gold_filler_cluster_id = document_system_to_gold.get(
                                    system_filler_cluster_id).get('aligned_to')
                                aligned_gold_filler_cluster_id_similarity = document_system_to_gold.get(
                                    system_filler_cluster_id).get(
                                        'aligned_similarity')
                                if aligned_gold_filler_cluster_id != 'None':
                                    if aligned_gold_filler_cluster_id_similarity == 0:
                                        self.record_event(
                                            'DEFAULT_CRITICAL_ERROR',
                                            'aligned_similarity=0')
                                    system_slot_fillers['{}:{}'.format(
                                        role_name,
                                        aligned_gold_filler_cluster_id)] = 1
                                else:
                                    system_slot_fillers['{}:{}'.format(
                                        role_name,
                                        system_filler_cluster_id)] = 1
                        if len(gold_slot_fillers) and len(system_slot_fillers):
                            precision, recall, f1 = get_precision_recall_and_f1(
                                set(gold_slot_fillers.keys()),
                                set(system_slot_fillers.keys()))
                score = FrameMetricScore(logger=self.logger,
                                         run_id=self.get('run_id'),
                                         document_id=document_id,
                                         language=language,
                                         metatype=metatype,
                                         gold_cluster_id=gold_cluster_id,
                                         system_cluster_id=system_cluster_id,
                                         precision=precision,
                                         recall=recall,
                                         f1=f1)
                scores.append(score)
            # add scores corresponding to unaligned system clusters
            precision, recall, f1 = [0, 0, 0]
            for system_cluster_id in document_system_to_gold if document_system_to_gold else []:
                gold_cluster_id = document_system_to_gold.get(
                    system_cluster_id).get('aligned_to')
                aligned_similarity = document_system_to_gold.get(
                    system_cluster_id).get('aligned_similarity')
                if system_cluster_id != 'None':
                    if gold_cluster_id == 'None':
                        metatype = self.get('cluster', 'system', document_id,
                                            system_cluster_id).get('metatype')
                        if metatype not in ['Event', 'Relation']: continue
                        score = FrameMetricScore(
                            logger=self.logger,
                            run_id=self.get('run_id'),
                            document_id=document_id,
                            language=language,
                            metatype=metatype,
                            gold_cluster_id=gold_cluster_id,
                            system_cluster_id=system_cluster_id,
                            precision=precision,
                            recall=recall,
                            f1=f1)
                        scores.append(score)
                    elif aligned_similarity == 0:
                        self.record_event('DEFAULT_CRITICAL_ERROR',
                                          'aligned_similarity=0')

        scores_printer = ScorePrinter(self.logger, self.printing_specs)
        for score in multisort(scores,
                               (('document_id', False), ('metatype', False),
                                ('gold_cluster_id', False),
                                ('system_cluster_id', False))):
            scores_printer.add(score)
        self.aggregate_scores(scores_printer, FrameMetricScore)
        self.scores = scores_printer
示例#9
0
    def score_responses(self):
        scores = []
        mean_similarities = {}
        counts = {}
        for document_id in self.get('core_documents'):
            # add scores corresponding to all gold clusters
            document = self.get('gold_responses').get('document_mappings').get(
                'documents').get(document_id)
            language = document.get('language')
            document_gold_to_system = self.get('cluster_alignment').get(
                'gold_to_system').get(document_id)
            for gold_cluster_id in document_gold_to_system if document_gold_to_system else []:
                system_cluster_id = document_gold_to_system.get(
                    gold_cluster_id).get('aligned_to')
                aligned_similarity = document_gold_to_system.get(
                    gold_cluster_id).get('aligned_similarity')
                similarity = 0
                if gold_cluster_id == 'None': continue
                gold_cluster = self.get('cluster', 'gold', document_id,
                                        gold_cluster_id)
                metatype = gold_cluster.get('metatype')
                if metatype not in ['Event', 'Relation']: continue
                if list(gold_cluster.get('dates').values())[0] is None:
                    self.record_event('NO_TEMPORAL_CONSTRAINT',
                                      gold_cluster_id, document_id)
                    continue
                if system_cluster_id != 'None':
                    if aligned_similarity == 0:
                        self.record_event('DEFAULT_CRITICAL_ERROR',
                                          'aligned_similarity=0')
                    system_cluster = self.get('cluster', 'system', document_id,
                                              system_cluster_id)
                    if system_cluster.get('metatype') != metatype:
                        self.record_event(
                            'UNEXPECTED_ALIGNED_CLUSTER_METATYPE',
                            system_cluster.get('metatype'), system_cluster_id,
                            metatype, gold_cluster_id)
                    if len(gold_cluster.get('dates').keys()) > 1:
                        self.record_event('UNEXPECTED_NUM_DATES',
                                          gold_cluster_id, document_id)
                    similarity = self.get(
                        'temporal_similarity',
                        list(gold_cluster.get('dates').values())[0],
                        list(system_cluster.get('dates').values()))
                for metatype_key in ['ALL', metatype]:
                    for language_key in ['ALL', language]:
                        key = '{language}:{metatype}'.format(
                            metatype=metatype_key, language=language_key)
                        mean_similarities[key] = mean_similarities.get(
                            key, 0) + similarity
                        counts[key] = counts.get(key, 0) + 1
                score = TemporalMetricScore(self.logger, self.get('runid'),
                                            document_id, language, metatype,
                                            gold_cluster_id, system_cluster_id,
                                            similarity)
                scores.append(score)

        scores_printer = ScorePrinter(self.logger, self.printing_specs,
                                      self.separator)
        for score in multisort(scores,
                               (('document_id', False), ('metatype', False),
                                ('gold_cluster_id', False),
                                ('system_cluster_id', False))):
            scores_printer.add(score)
        for key in sorted(mean_similarities, key=self.order):
            mean_similarity = mean_similarities[key] / counts[key] if counts[
                key] else 0
            language, metatype = key.split(':')
            mean_score = TemporalMetricScore(self.logger,
                                             self.get('runid'),
                                             'Summary',
                                             language,
                                             metatype,
                                             '',
                                             '',
                                             mean_similarity,
                                             summary=True)
            scores_printer.add(mean_score)
        self.scores = scores_printer
示例#10
0
    def score_responses(self):
        scores = []
        for document_id in self.get('core_documents'):
            # add scores corresponding to all gold clusters
            document = self.get('gold_responses').get('document_mappings').get('documents').get(document_id)
            language = document.get('language')
            self.record_event('ANNOTATED_TYPES_INFO', document_id, ','.join(self.get('annotated_regions').get('types_annotated_for_document', document_id)))
            document_gold_to_system = self.get('cluster_alignment').get('gold_to_system').get(document_id)
            for gold_cluster_id in document_gold_to_system if document_gold_to_system else []:
                system_cluster_id = document_gold_to_system.get(gold_cluster_id).get('aligned_to')
                aligned_similarity = document_gold_to_system.get(gold_cluster_id).get('aligned_similarity')
                precision, recall, f1 = [0,0,0]
                if gold_cluster_id == 'None': continue
                gold_cluster = self.get('gold_responses').get('document_clusters').get(document_id).get(gold_cluster_id)
                metatype = gold_cluster.get('metatype')
                if metatype not in ['Entity', 'Event']: continue
                if system_cluster_id != 'None':
                    if aligned_similarity == 0:
                        self.record_event('DEFAULT_CRITICAL_ERROR', 'aligned_similarity=0')
                    system_cluster = self.get('cluster', 'system', document_id, system_cluster_id)
                    if system_cluster.get('metatype') != metatype:
                        self.record_event('UNEXPECTED_ALIGNED_CLUSTER_METATYPE', system_cluster.get('metatype'), system_cluster_id, metatype, gold_cluster_id)
                    gold_types = set(gold_cluster.get('all_expanded_types'))
                    system_types = set()
                    if document_id in self.get('system_responses').get('document_clusters'):
                        system_types = set(self.get('system_responses').get('document_clusters').get(document_id).get(system_cluster_id).get('all_expanded_types'))
                    augmented_gold_types = self.get('augmented_types', document_id, gold_types)
                    augmented_system_types = self.get('augmented_types', document_id, system_types)
                    self.record_event('TYPE_METRIC_SCORE_INFO', self.__class__.__name__, 'TYPES_SUBMITTED', document_id, gold_cluster_id, ','.join(gold_types), system_cluster_id, ','.join(system_types))
                    self.record_event('TYPE_METRIC_SCORE_INFO', self.__class__.__name__, 'TYPES_SCORED', document_id, gold_cluster_id, ','.join(augmented_gold_types), system_cluster_id, ','.join(augmented_system_types))
                    precision, recall, f1 = get_precision_recall_and_f1(augmented_gold_types, augmented_system_types)
                score = TypeMetricScoreV1(logger=self.logger,
                                          run_id=self.get('run_id'),
                                          document_id=document_id,
                                          language=language,
                                          metatype=metatype,
                                          gold_cluster_id=gold_cluster_id,
                                          system_cluster_id=system_cluster_id,
                                          precision=precision,
                                          recall=recall,
                                          f1=f1)
                scores.append(score)
            # add scores unaligned system clusters
            document_system_to_gold = self.get('cluster_alignment').get('system_to_gold').get(document_id)
            for system_cluster_id in document_system_to_gold if document_system_to_gold else []:
                gold_cluster_id = document_system_to_gold.get(system_cluster_id).get('aligned_to')
                aligned_similarity = document_system_to_gold.get(system_cluster_id).get('aligned_similarity')
                if system_cluster_id != 'None':
                    system_cluster = self.get('system_responses').get('document_clusters').get(document_id).get(system_cluster_id)
                    metatype = system_cluster.get('metatype')
                    if metatype not in ['Entity', 'Event']: continue
                    if gold_cluster_id == 'None':
                        precision, recall, f1 = [0,0,0]
                        score = TypeMetricScoreV1(logger=self.logger,
                                                  run_id=self.get('run_id'),
                                                  document_id=document_id,
                                                  language=language,
                                                  metatype=metatype,
                                                  gold_cluster_id=gold_cluster_id,
                                                  system_cluster_id=system_cluster_id,
                                                  precision=precision,
                                                  recall=recall,
                                                  f1=f1)
                        scores.append(score)
                    elif aligned_similarity == 0:
                        self.record_event('DEFAULT_CRITICAL_ERROR', 'aligned_similarity=0')

        scores_printer = ScorePrinter(self.logger, self.printing_specs)
        for score in multisort(scores, (('document_id', False),
                                        ('metatype', False),
                                        ('gold_cluster_id', False),
                                        ('system_cluster_id', False))):
            scores_printer.add(score)
        self.aggregate_scores(scores_printer, TypeMetricScoreV1)
        self.scores = scores_printer
示例#11
0
    def score_responses(self):
        scores = []
        mean_f1s = {}
        counts = {}
        for document_id in self.get('core_documents'):
            # add scores corresponding to all gold clusters
            document = self.get('gold_responses').get('document_mappings').get(
                'documents').get(document_id)
            language = document.get('language')
            document_gold_to_system = self.get('cluster_alignment').get(
                'gold_to_system').get(document_id)
            document_system_to_gold = self.get('cluster_alignment').get(
                'system_to_gold').get(document_id)
            for gold_cluster_id in document_gold_to_system if document_gold_to_system else []:
                system_cluster_id = document_gold_to_system.get(
                    gold_cluster_id).get('aligned_to')
                aligned_similarity = document_gold_to_system.get(
                    gold_cluster_id).get('aligned_similarity')
                precision, recall, f1 = [0, 0, 0]
                if gold_cluster_id == 'None': continue
                gold_cluster = self.get('cluster', 'gold', document_id,
                                        gold_cluster_id)
                metatype = gold_cluster.get('metatype')
                if metatype not in ['Event', 'Relation']: continue
                if system_cluster_id != 'None':
                    if aligned_similarity == 0:
                        self.record_event('DEFAULT_CRITICAL_ERROR',
                                          'aligned_similarity=0')
                    system_cluster = self.get('cluster', 'system', document_id,
                                              system_cluster_id)
                    if system_cluster.get('metatype') != metatype:
                        self.record_event(
                            'UNEXPECTED_ALIGNED_CLUSTER_METATYPE',
                            system_cluster.get('metatype'), system_cluster_id,
                            metatype, gold_cluster_id)
                    gold_frame = self.get('frame', 'gold', document_id,
                                          gold_cluster_id)
                    gold_slot_fillers = {}
                    if gold_frame is None or len(
                            gold_frame.get('role_fillers')) == 0:
                        if gold_cluster.get('metatype') == 'Relation':
                            self.record_event('MISSING_GOLD_FRAME',
                                              gold_cluster.get('metatype'),
                                              gold_cluster_id, document_id,
                                              self.get('code_location'))
                        continue
                    for role_name in gold_frame.get('role_fillers'):
                        for gold_filler_cluster_id in gold_frame.get(
                                'role_fillers').get(role_name):
                            gold_slot_fillers['{}:{}'.format(
                                role_name, gold_filler_cluster_id)] = 1
                    system_frame = self.get('frame', 'system', document_id,
                                            system_cluster_id)
                    if system_frame:
                        system_slot_fillers = {}
                        for role_name in system_frame.get('role_fillers'):
                            for system_filler_cluster_id in system_frame.get(
                                    'role_fillers').get(role_name):
                                aligned_gold_filler_cluster_id = document_system_to_gold.get(
                                    system_filler_cluster_id).get('aligned_to')
                                aligned_gold_filler_cluster_id_similarity = document_system_to_gold.get(
                                    system_filler_cluster_id).get(
                                        'aligned_similarity')
                                if aligned_gold_filler_cluster_id != 'None':
                                    if aligned_gold_filler_cluster_id_similarity == 0:
                                        self.record_event(
                                            'DEFAULT_CRITICAL_ERROR',
                                            'aligned_similarity=0')
                                    system_slot_fillers['{}:{}'.format(
                                        role_name,
                                        aligned_gold_filler_cluster_id)] = 1
                                else:
                                    system_slot_fillers['{}:{}'.format(
                                        role_name,
                                        system_filler_cluster_id)] = 1
                        if len(gold_slot_fillers) and len(system_slot_fillers):
                            precision, recall, f1 = get_precision_recall_and_f1(
                                set(gold_slot_fillers.keys()),
                                set(system_slot_fillers.keys()))
                for metatype_key in ['ALL', metatype]:
                    for language_key in ['ALL', language]:
                        key = '{language}:{metatype}'.format(
                            metatype=metatype_key, language=language_key)
                        mean_f1s[key] = mean_f1s.get(key, 0) + f1
                        counts[key] = counts.get(key, 0) + 1
                score = FrameMetricScore(self.logger, self.get('runid'),
                                         document_id, language, metatype,
                                         gold_cluster_id, system_cluster_id,
                                         precision, recall, f1)
                scores.append(score)
            # add scores corresponding to unaligned system clusters
            precision, recall, f1 = [0, 0, 0]
            for system_cluster_id in document_system_to_gold if document_system_to_gold else []:
                gold_cluster_id = document_system_to_gold.get(
                    system_cluster_id).get('aligned_to')
                aligned_similarity = document_system_to_gold.get(
                    system_cluster_id).get('aligned_similarity')
                if system_cluster_id != 'None':
                    if gold_cluster_id == 'None':
                        metatype = self.get('cluster', 'system', document_id,
                                            system_cluster_id).get('metatype')
                        if metatype not in ['Event', 'Relation']: continue
                        for metatype_key in ['ALL', metatype]:
                            for language_key in ['ALL', language]:
                                key = '{language}:{metatype}'.format(
                                    metatype=metatype_key,
                                    language=language_key)
                                mean_f1s[key] = mean_f1s.get(key, 0) + f1
                                counts[key] = counts.get(key, 0) + 1
                        score = FrameMetricScore(self.logger,
                                                 self.get('runid'),
                                                 document_id, language,
                                                 metatype, gold_cluster_id,
                                                 system_cluster_id, precision,
                                                 recall, f1)
                        scores.append(score)
                    elif aligned_similarity == 0:
                        self.record_event('DEFAULT_CRITICAL_ERROR',
                                          'aligned_similarity=0')

        scores_printer = ScorePrinter(self.logger, self.printing_specs,
                                      self.separator)
        for score in multisort(scores,
                               (('document_id', False), ('metatype', False),
                                ('gold_cluster_id', False),
                                ('system_cluster_id', False))):
            scores_printer.add(score)
        for key in sorted(mean_f1s, key=self.order):
            mean_f1 = mean_f1s[key] / counts[key] if counts[key] else 0
            language, metatype = key.split(':')
            mean_score = FrameMetricScore(self.logger,
                                          self.get('runid'),
                                          'Summary',
                                          language,
                                          metatype,
                                          '',
                                          '',
                                          '',
                                          '',
                                          mean_f1,
                                          summary=True)
            scores_printer.add(mean_score)

        self.scores = scores_printer
示例#12
0
    def score_responses(self):
        scores = []
        mean_f1s = {}
        counts = {}
        for document_id in self.get('core_documents'):
            # add scores corresponding to all gold clusters
            document = self.get('gold_responses').get('document_mappings').get(
                'documents').get(document_id)
            language = document.get('language')
            self.record_event(
                'ANNOTATED_TYPES_INFO', document_id, ','.join(
                    self.get('annotated_regions').get(
                        'types_annotated_for_document', document_id)))
            document_gold_to_system = self.get('cluster_alignment').get(
                'gold_to_system').get(document_id)
            for gold_cluster_id in document_gold_to_system if document_gold_to_system else []:
                system_cluster_id = document_gold_to_system.get(
                    gold_cluster_id).get('aligned_to')
                aligned_similarity = document_gold_to_system.get(
                    gold_cluster_id).get('aligned_similarity')
                precision, recall, f1 = [0, 0, 0]
                if gold_cluster_id == 'None': continue
                gold_cluster = self.get('gold_responses').get(
                    'document_clusters').get(document_id).get(gold_cluster_id)
                metatype = gold_cluster.get('metatype')
                if metatype not in ['Entity', 'Event']: continue
                if system_cluster_id != 'None':
                    if aligned_similarity == 0:
                        self.record_event('DEFAULT_CRITICAL_ERROR',
                                          'aligned_similarity=0')
                    system_cluster = self.get('cluster', 'system', document_id,
                                              system_cluster_id)
                    if system_cluster.get('metatype') != metatype:
                        self.record_event(
                            'UNEXPECTED_ALIGNED_CLUSTER_METATYPE',
                            system_cluster.get('metatype'), system_cluster_id,
                            metatype, gold_cluster_id)
                    gold_types = set(gold_cluster.get('all_expanded_types'))
                    system_types = set()
                    if document_id in self.get('system_responses').get(
                            'document_clusters'):
                        system_types = set(
                            self.get('system_responses').
                            get('document_clusters').get(document_id).get(
                                system_cluster_id).get('all_expanded_types'))
                    augmented_gold_types = self.get('augmented_types',
                                                    document_id, gold_types)
                    augmented_system_types = self.get('augmented_types',
                                                      document_id,
                                                      system_types)
                    self.record_event('TEMPORAL_METRIC_SCORE_INFO',
                                      'TYPES_SUBMITTED', document_id,
                                      gold_cluster_id, ','.join(gold_types),
                                      system_cluster_id,
                                      ','.join(system_types))
                    self.record_event('TEMPORAL_METRIC_SCORE_INFO',
                                      'TYPES_SCORED', document_id,
                                      gold_cluster_id,
                                      ','.join(augmented_gold_types),
                                      system_cluster_id,
                                      ','.join(augmented_system_types))
                    precision, recall, f1 = get_precision_recall_and_f1(
                        augmented_gold_types, augmented_system_types)
                for metatype_key in ['ALL', metatype]:
                    for language_key in ['ALL', language]:
                        key = '{language}:{metatype}'.format(
                            metatype=metatype_key, language=language_key)
                        mean_f1s[key] = mean_f1s.get(key, 0) + f1
                        counts[key] = counts.get(key, 0) + 1
                score = TypeMetricScore(self.logger, self.get('runid'),
                                        document_id, language, metatype,
                                        gold_cluster_id, system_cluster_id,
                                        precision, recall, f1)
                scores.append(score)
            # add scores unaligned system clusters
            document_system_to_gold = self.get('cluster_alignment').get(
                'system_to_gold').get(document_id)
            for system_cluster_id in document_system_to_gold if document_system_to_gold else []:
                gold_cluster_id = document_system_to_gold.get(
                    system_cluster_id).get('aligned_to')
                aligned_similarity = document_system_to_gold.get(
                    system_cluster_id).get('aligned_similarity')
                if system_cluster_id != 'None':
                    system_cluster = self.get('system_responses').get(
                        'document_clusters').get(document_id).get(
                            system_cluster_id)
                    metatype = system_cluster.get('metatype')
                    if metatype not in ['Entity', 'Event']: continue
                    if gold_cluster_id == 'None':
                        precision, recall, f1 = [0, 0, 0]
                        for metatype_key in ['ALL', metatype]:
                            for language_key in ['ALL', language]:
                                key = '{language}:{metatype}'.format(
                                    metatype=metatype_key,
                                    language=language_key)
                                mean_f1s[key] = mean_f1s.get(key, 0) + f1
                                counts[key] = counts.get(key, 0) + 1
                        score = TypeMetricScore(self.logger, self.get('runid'),
                                                document_id, language,
                                                metatype, gold_cluster_id,
                                                system_cluster_id, precision,
                                                recall, f1)
                        scores.append(score)
                    elif aligned_similarity == 0:
                        self.record_event('DEFAULT_CRITICAL_ERROR',
                                          'aligned_similarity=0')

        scores_printer = ScorePrinter(self.logger, self.printing_specs,
                                      self.separator)
        for score in multisort(scores,
                               (('document_id', False), ('metatype', False),
                                ('gold_cluster_id', False),
                                ('system_cluster_id', False))):
            scores_printer.add(score)
        for key in sorted(mean_f1s, key=self.order):
            mean_f1 = mean_f1s[key] / counts[key] if counts[key] else 0
            language, metatype = key.split(':')
            mean_score = TypeMetricScore(self.logger,
                                         self.get('runid'),
                                         'Summary',
                                         language,
                                         metatype,
                                         '',
                                         '',
                                         '',
                                         '',
                                         mean_f1,
                                         summary=True)
            scores_printer.add(mean_score)
        self.scores = scores_printer
示例#13
0
def main(args):
    logger = Logger(args.log, args.log_specifications, sys.argv)

    type_mappings = Container(logger)
    for entry in FileHandler(logger, args.ontology_type_mappings):
        type_mappings.add(key=entry.get('full_type_ov'),
                          value=entry.get('full_type'))

    text_boundaries = TextBoundaries(logger, args.sentence_boundaries)
    image_boundaries = ImageBoundaries(logger, args.image_boundaries)
    video_boundaries = VideoBoundaries(logger, args.video_boundaries)
    keyframe_boundaries = KeyFrameBoundaries(logger, args.keyframe_boundaries)
    document_boundaries = {
        'text': text_boundaries,
        'image': image_boundaries,
        'keyframe': keyframe_boundaries,
        'video': video_boundaries
    }

    output = []
    for entry in FileHandler(logger, args.input):
        document_id = entry.get('root_doc_id')
        document_element_id = entry.get('doc_element_id')
        modality = entry.get('media_type')
        type = entry.get('type')
        subtype = entry.get('subtype')
        subsubtype = entry.get('subsubtype')
        # apply patch to correct LDC's mistake in annotation
        if type == 'personalsocial' and subtype == 'unspecified':
            subtype = 'relationship'
        full_type = '{type}.{subtype}.{subsubtype}'.format(
            type=type, subtype=subtype, subsubtype=subsubtype)
        full_type_cleaned = full_type.replace('.unspecified', '')
        propercased_full_type = type_mappings.get(full_type_cleaned, None)
        if propercased_full_type is None:
            logger.record_event(
                'DEFAULT_CRITICAL_ERROR',
                'propercased_full_type is None for full_type: {}'.format(
                    full_type))
        span_string = entry.get('span')
        keyframe_id = None
        keyframe_num = 0
        if span_string == 'ENTIRE_DOCUMENT_ELEMENT':
            document_boundary = document_boundaries.get(modality).get(
                document_element_id)
            span_string = document_boundary.__str__()
        elif '-' in span_string:
            start, end = span_string.split('-')
            span_string = '({start},0)-({end},0)'.format(start=start, end=end)
        elif '_' in span_string:
            keyframe_id = span_string
            keyframe_num = span_string.split('_')[1]
            document_boundary = document_boundaries.get('keyframe').get(
                keyframe_id)
            span_string = document_boundary.__str__()
        else:
            span_string = None
        output_object = {
            'document_id': document_id,
            'document_element_id': document_element_id,
            'keyframe_id': keyframe_id,
            'keyframe_num': int(keyframe_num),
            'modality': modality,
            'region': span_string,
            'type': propercased_full_type,
        }
        output.append(output_object)

    printed = {}
    fh = open(args.output, 'w')
    header = [
        'document_id', 'document_element_or_keyframe_id', 'modality', 'region',
        'type'
    ]
    fh.write('{}\n'.format('\t'.join(header)))
    for output_object in multisort(
            output, (('document_id', False), ('modality', False),
                     ('document_element_id', False), ('keyframe_num', False),
                     ('region', False), ('type', False))):
        line = get_line(output_object, header)
        if line not in printed:
            fh.write('{}\n'.format(line))
            printed[line] = 1
    fh.close()
    exit(ALLOK_EXIT_CODE)