def encode_entities( self, entity_data: Dict[Text, Any], precomputations: Optional[MessageContainerForCoreFeaturization], bilou_tagging: bool = False, ) -> Dict[Text, List[Features]]: """Encode the given entity data. Produce numeric entity tags for tokens. Args: entity_data: The dict containing the text and entity labels and locations precomputations: Contains precomputed features and attributes. bilou_tagging: indicates whether BILOU tagging should be used or not Returns: A dictionary of entity type to list of features. """ # TODO # The entity states used to create the tag-idx-mapping contains the # entities and the concatenated entity and roles/groups. We do not # distinguish between entities and roles/groups right now. if ( not entity_data or not self.entity_tag_specs or self.entity_tag_specs[0].num_tags < 2 ): # we cannot build a classifier with fewer than 2 classes return {} message = precomputations.lookup_message(user_text=entity_data[TEXT]) message.data[ENTITIES] = entity_data[ENTITIES] if not message: return {} if bilou_tagging: bilou_utils.apply_bilou_schema_to_message(message) return { ENTITY_TAGS: [ model_data_utils.get_tag_ids( message, self.entity_tag_specs[0], bilou_tagging ) ] }
def encode_entities( self, entity_data: Dict[Text, Any], interpreter: NaturalLanguageInterpreter, bilou_tagging: bool = False, ) -> Dict[Text, List["Features"]]: """Encode the given entity data with the help of the given interpreter. Produce numeric entity tags for tokens. Args: entity_data: The dict containing the text and entity labels and locations interpreter: The interpreter used to encode the state bilou_tagging: indicates whether BILOU tagging should be used or not Returns: A dictionary of entity type to list of features. """ # TODO # The entity states used to create the tag-idx-mapping contains the # entities and the concatenated entity and roles/groups. We do not # distinguish between entities and roles/groups right now. if (not entity_data or not self.entity_tag_specs or self.entity_tag_specs[0].num_tags < 2): # we cannot build a classifier with fewer than 2 classes return {} message = interpreter.featurize_message(Message(entity_data)) if not message: return {} if bilou_tagging: bilou_utils.apply_bilou_schema_to_message(message) return { ENTITY_TAGS: [ model_data_utils.get_tag_ids(message, self.entity_tag_specs[0], bilou_tagging) ] }