def get_date_range(self, responses, entry, date_name): date_range = Object(entry.get('logger')) present = False for range_field in ['before', 'after']: date_range.set(range_field, entry.get('{}_{}'.format(date_name, range_field))) if date_range.get(range_field): present = True return date_range if present else None
def generate_date_start_and_end(self, response, entry): date_object = Object(entry.get('logger')) present = False for field_name in ['start', 'end']: if entry.get(field_name): present = True date_object.set(field_name, entry.get(field_name)) entry.set('date', date_object if present else None) entry.get('subject_cluster').get('dates').add(entry.get('date'))
def generate_date_start_and_end(self, response, entry): date_object = Object(entry.get('logger')) present = False for field_name in ['start', 'end']: if entry.get(field_name): present = True date_object.set(field_name, entry.get(field_name)) entry.set('date', date_object if present else None) if entry.get('schema').get('name') in [ 'AIDA_PHASE2_TASK1_TM_RESPONSE', 'AIDA_PHASE3_TASK3_TM_RESPONSE' ]: entry.get('subject_cluster').get('dates').add(entry.get('date'))
def get_date(self, responses, entry, date_name): date_fields = ['month', 'day', 'year'] date_field_values = {key: trim(entry.get(field_name)) for key, field_name in {key:'{}_{}'.format(date_name, key) for key in date_fields}.items()} date_object = Object(entry.get('logger')) present = False for date_field in date_fields: date_object.set(date_field, None if date_field_values[date_field]=='' else int(date_field_values[date_field])) if date_object.get(date_field): present = True # consider all date fields to be missing if year was missing even if day and month were provided if present and not date_object.get('year'): present = False if present and date_object.get('day') and not date_object.get('month'): date_object.set('day', None) return date_object if present else None
def get_date(self, responses, entry, date_name): date_fields = {'month': 'xx', 'day': 'xx', 'year': 'xxxx'} date_field_values = { key: trim(entry.get(field_name)) for key, field_name in {key: '{}_{}'.format(date_name, key) for key in date_fields}.items() } field_names_missing = [] date_object = Object(entry.get('logger')) for date_field in date_fields: date_object.set( date_field, None if date_field_values[date_field] == '' else int(date_field_values[date_field])) if date_field_values[date_field] == '': field_names_missing.append(date_field) date_field_values[date_field] = date_fields[date_field] if len(field_names_missing) > 0: unspecified_date = '{year}-{month}-{day}'.format( day=date_field_values['day'], month=date_field_values['month'], year=date_field_values['year']) start_or_end, before_or_after = date_name.split('_') corrected_date = LDCTime(self.get('logger'), unspecified_date, start_or_end, before_or_after, entry.get('where')) # update date_object if 'year' in field_names_missing: date_object = None else: missing_fields = ','.join(field_names_missing) self.record_event('MISSING_DATE_FIELD', date_name, missing_fields, corrected_date.__str__(), date_name, entry.get('where')) for date_field in date_fields: date_object.set( date_field, int(corrected_date.get(date_field).__str__())) entry.set( '{}_{}'.format(date_name, date_field), '"{}"'.format( corrected_date.get(date_field).__str__())) return date_object
def get_document_type_role_fillers(self, system_or_gold, document_id): logger = self.get('logger') type_role_fillers = Container(logger) responses = self.get('{}_responses'.format(system_or_gold)) if document_id in responses.get('document_frames'): for frame in responses.get('document_frames').get( document_id).values(): metatype = frame.get('metatype') role_fillers = frame.get('role_fillers') for role_name in role_fillers: for filler_cluster_id in role_fillers.get(role_name): for predicate_justification in role_fillers.get( role_name).get(filler_cluster_id): type_invoked = self.get('type_invoked', predicate_justification, role_name) type_role_filler_string = '{type_invoked}_{role_name}:{filler_cluster_id}'.format( type_invoked=type_invoked, role_name=role_name, filler_cluster_id=filler_cluster_id) type_role_filler = type_role_fillers.get( type_role_filler_string, default=Object(logger)) type_role_filler.set('metatype', metatype) type_role_filler.set('type', type_invoked) type_role_filler.set('role_name', role_name) type_role_filler.set('filler_cluster_id', filler_cluster_id) if type_role_filler.get( 'predicate_justifications') is None: type_role_filler.set( 'predicate_justifications', Container(logger)) type_role_filler.get('predicate_justifications' ).add(predicate_justification) return type_role_fillers
def spanstring_to_object(logger, span_string, where=None): pattern = re.compile('^(.*?):(.*?):\((\S+),(\S+)\)-\((\S+),(\S+)\)$') match = pattern.match(span_string) mention = Object(logger) if match: document_id = match.group(1) document_element_id, keyframe_id = parse_document_element_id(match.group(2)) span = Span(logger, match.group(3), match.group(4), match.group(5), match.group(6)) mention.set('span_string', span_string) mention.set('document_id', document_id) mention.set('document_element_id', document_element_id) mention.set('keyframe_id', keyframe_id) mention.set('span', span) mention.set('where', where) else: logger.record_event('UNEXPECTED_SPAN_FORMAT', span_string, where) return mention
def update(self, entry): event_or_relation_type, rolename = entry.get('?predicate').split('_') if self.get('metatype') is None: self.set('metatype', entry.get('?metatype')) if self.get('metatype') != entry.get('?metatype'): self.record_event('METATYPE_MISMATCH', self.get('ID'), self.get('metatype'), entry.get('?metatype'), entry.get('where')) self.get('types')[event_or_relation_type] = 1 filler = Object(self.get('logger')) filler_cluster_id = entry.get('?object') filler.set('filler_cluster_id', filler_cluster_id) filler.set('predicate_justification', entry.get('?predicate_justification')) filler.set('argument_assertion_confidence', entry.get('?argument_assertion_confidence')) filler.set('predicate_justification_confidence', entry.get('?predicate_justification_confidence')) filler.set('where', entry.get('where')) if rolename not in self.get('role_fillers'): self.get('role_fillers')[rolename] = {} if filler_cluster_id not in self.get('role_fillers')[rolename]: self.get('role_fillers')[rolename][filler_cluster_id] = [] self.get('role_fillers')[rolename][filler_cluster_id].append(filler)
def load_classquery_assessments(self): next_fqec_num = 1001 generated_fqecs = {} query_type = 'ClassQuery' path = '{}/data/class/*/*.tab'.format(self.assessments_dir) header = FileHeader( self.logger, "\t".join(assessments.get(query_type).get('columns'))) for filename in glob.glob(path): for entry in FileHandler(self.logger, filename, header): queryid, docid, mention_span, assessment_read, fqec_read, where = map( lambda key: entry.get(key), [ 'queryid', 'docid', 'mention_span', 'assessment', 'fqec', 'where' ]) assessment = self.normalize('assessment', assessment_read) query_and_document = '{}:{}'.format(queryid, docid) key = '{}:{}'.format(query_and_document, mention_span) if self.exists(key): self.logger.record_event('MULTIPLE_ASSESSMENTS', key, where) fqec = fqec_read if fqec == 'NIL' and self.normalize('assessment', assessment) == 'CORRECT': if key not in generated_fqecs: fqec = 'NILG{}'.format(next_fqec_num) generated_fqecs[key] = fqec fqec = generated_fqecs[key] assessment_entry = Object(self.logger) assessment_entry.set('assessment', assessment) assessment_entry.set('docid', docid) assessment_entry.set('queryid', queryid) assessment_entry.set('mention_span', mention_span) assessment_entry.set('fqec_read', fqec_read) assessment_entry.set('fqec', fqec) assessment_entry.set('where', where) if not self.exists(key): self.add(key=key, value=assessment_entry) line = 'QUERYID={} DOCID={} MENTION={} ASSESSMENT={} FQEC_READ={} FQEC={}'.format( queryid, docid, mention_span, assessment, fqec_read, fqec) self.logger.record_event('GROUND_TRUTH', line, where)
def load_task2_assessments(self): next_fqec_num = 1001 generated_fqecs = {} path = '{}/data/zero-hop/*.tab'.format(self.assessments_dir) header = FileHeader(self.logger, "\t".join(assessments.get('task2').get('across_documents_coreference').get('columns'))) for filename in glob.glob(path): for entry in FileHandler(self.logger, filename, header): queryid, docid, mention_span, assessment_read, fqec_read, where = map( lambda key: entry.get(key), ['queryid', 'docid', 'mention_span', 'assessment', 'fqec', 'where'] ) entity_id = self.get('queries_to_score').get(queryid).get('entity_id') assessment = self.normalize('assessment', assessment_read) query_and_document = '{}:{}'.format(queryid, docid) key = '{}:{}'.format(query_and_document, mention_span) if self.exists(key): self.logger.record_event('MULTIPLE_ASSESSMENTS', key, where) fqec = fqec_read if fqec == 'NIL' and self.normalize('assessment', assessment) == 'CORRECT': if key not in generated_fqecs: fqec = 'NILG{}'.format(next_fqec_num) generated_fqecs[key] = fqec fqec = generated_fqecs[key] assessment_entry = Object(self.logger) assessment_entry.set('assessment', assessment) assessment_entry.set('docid', docid) assessment_entry.set('queryid', queryid) assessment_entry.set('mention_span', mention_span) assessment_entry.set('fqec_read', fqec_read) assessment_entry.set('fqec', fqec) assessment_entry.set('line', entry.get('line')) assessment_entry.set('where', where) if not self.exists(queryid): self.add(key=queryid, value=Container(self.get('logger'))) self.get(queryid).add(key=':'.join(key.split(':')[1:]), value=assessment_entry) line = 'ENTITYID={} QUERYID={} DOCID={} MENTION={} ASSESSMENT={} FQEC_READ={} FQEC={}'.format( entity_id, queryid, docid, mention_span, assessment, fqec_read, fqec) self.logger.record_event('GROUND_TRUTH', line, where)