def apply_custom_classification_rules(classified, language, overwrite=False): """ Implements simple custom, classifier-agnostic rules for recognizing some frame elements :param dict classified: an item produced by the classifier :param str language: Language of the sentence :param bool overwrite: Tells the priority in case the rules assign a role to the same chunk recognized by the classifier :return: The same item with augmented FEs """ chunk_to_fe = {fe['chunk']: fe for fe in classified['fes']} # if not already done, normalize numerica FEs if not any(fe['fe'] in ['Time', 'Duration'] for fe in classified['fes']): numerical = normalize_numerical_fes(language, classified['text']) for each in numerical: old = chunk_to_fe.get(each['chunk']) if old is None: chunk_to_fe[each['chunk']] = each elif overwrite: chunk_to_fe[each['chunk']] = each logger.debug( 'chunk "%s" was assigned role %s, assigning %s instead', old['chunk'], old['fe'], each['fe']) # all places recognized by the entity linker are FEs for entity in classified.get('linked_entities', []): typeof_place = 'http://dbpedia.org/ontology/Place' if typeof_place in entity['types']: fe = { 'fe': 'Place', 'chunk': entity['chunk'], 'score': entity['confidence'], 'link': entity, } old = chunk_to_fe.get(entity['chunk']) if old is None: chunk_to_fe[fe['chunk']] = fe elif overwrite or typeof_place not in old.get('link', {}).get( 'types', []): chunk_to_fe[fe['chunk']] = fe logger.debug( 'chunk "%s" was assigned role %s, assigning %s instead', old['chunk'], old['fe'], 'Place') classified['fes'] = chunk_to_fe.values() # check that no chunk is assigned more than one role assert len(set(fe['chunk'] for fe in classified['fes'])) == len(classified['fes']) return classified
def apply_custom_classification_rules(classified, language, overwrite=False): """ Implements simple custom, classifier-agnostic rules for recognizing some frame elements :param dict classified: an item produced by the classifier :param str language: Language of the sentence :param bool overwrite: Tells the priority in case the rules assign a role to the same chunk recognized by the classifier :return: The same item with augmented FEs """ chunk_to_fe = {fe['chunk']: fe for fe in classified['fes']} # if not already done, normalize numerica FEs if not any(fe['fe'] in ['Time', 'Duration'] for fe in classified['fes']): numerical = normalize_numerical_fes(language, classified['text']) for each in numerical: old = chunk_to_fe.get(each['chunk']) if old is None: chunk_to_fe[each['chunk']] = each elif overwrite: chunk_to_fe[each['chunk']] = each logger.debug('chunk "%s" was assigned role %s, assigning %s instead', old['chunk'], old['fe'], each['fe']) # all places recognized by the entity linker are FEs for entity in classified.get('linked_entities', []): typeof_place = 'http://dbpedia.org/ontology/Place' if typeof_place in entity['types']: fe = { 'fe': 'Place', 'chunk': entity['chunk'], 'score': entity['confidence'], 'link': entity, } old = chunk_to_fe.get(entity['chunk']) if old is None: chunk_to_fe[fe['chunk']] = fe elif overwrite or typeof_place not in old.get('link', {}).get('types', []): chunk_to_fe[fe['chunk']] = fe logger.debug('chunk "%s" was assigned role %s, assigning %s instead', old['chunk'], old['fe'], 'Place') classified['fes'] = chunk_to_fe.values() # check that no chunk is assigned more than one role assert len(set(fe['chunk'] for fe in classified['fes'])) == len(classified['fes']) return classified
def label_sentence(self, sentence, normalize_numerical, score_type, core_weight): """ Labels a single sentence :param sentence: Sentence data to label :param normalize_numerical: Automatically normalize numerical FEs :param score_type: Which type of score (if any) to use to compute the classification confidence :param core_weight: Weight of the core FEs (used in the scoring) :return: Labeled data """ logger.debug('processing sentence "%s"', sentence['text']) if not sentence.get('url'): logger.warn('a sentence is missing the url, skipping it') return None elif not sentence.get('text', '').strip(): return None tagged = sentence['tagged'] if 'tagged' in sentence else self.tagger.tag_one(sentence['text']) # Normalize + annotate numerical FEs numerical_fes = [] if normalize_numerical: numerical_fes.extend(list(normalize_numerical_fes(self.language, sentence['text']))) for token, pos, lemma in tagged: if lemma not in self.frame_data or not pos.startswith(self.frame_data[lemma]['pos']): continue frame = self.frame_data[lemma] if not frame['ontology_to_fe'].keys(): logger.debug('missing FE types for frame %s, skipping', frame['frame']) continue logger.debug('trying frame %s with FE of types %s', frame['frame'], frame['ontology_to_fe'].keys()) assigned_fes = self.assign_frame_elements(sentence['linked_entities'], frame) all_fes = numerical_fes + assigned_fes if assigned_fes or numerical_fes: logger.debug('assigning frame: %s and FEs %s', frame['frame'], all_fes) labeled = { 'name': sentence['name'], 'url': sentence['url'], 'text': sentence['text'], 'linked_entities': sentence['linked_entities'], 'frame': frame['frame'], 'fes': all_fes, 'lu': lemma, } break else: logger.debug('no FEs assigned for frame %s, trying another one', frame['frame']) else: logger.debug('did not assign any frame to sentence "%s"', sentence['text']) return None if score_type: labeled['score'] = scoring.compute_score(labeled, score_type, core_weight) assert 'lu' in labeled and labeled['fes'] final = apply_custom_classification_rules(labeled, self.language) return final