示例#1
0
    def encode_entities(
        self,
        entity_data: Dict[Text, Any],
        precomputations: Optional[MessageContainerForCoreFeaturization],
        bilou_tagging: bool = False,
    ) -> Dict[Text, List[Features]]:
        """Encode the given entity data.

        Produce numeric entity tags for tokens.

        Args:
            entity_data: The dict containing the text and entity labels and locations
            precomputations: Contains precomputed features and attributes.
            bilou_tagging: indicates whether BILOU tagging should be used or not

        Returns:
            A dictionary of entity type to list of features.
        """
        # TODO
        #  The entity states used to create the tag-idx-mapping contains the
        #  entities and the concatenated entity and roles/groups. We do not
        #  distinguish between entities and roles/groups right now.
        if (
            not entity_data
            or not self.entity_tag_specs
            or self.entity_tag_specs[0].num_tags < 2
        ):
            # we cannot build a classifier with fewer than 2 classes
            return {}

        message = precomputations.lookup_message(user_text=entity_data[TEXT])
        message.data[ENTITIES] = entity_data[ENTITIES]

        if not message:
            return {}

        if bilou_tagging:
            bilou_utils.apply_bilou_schema_to_message(message)

        return {
            ENTITY_TAGS: [
                model_data_utils.get_tag_ids(
                    message, self.entity_tag_specs[0], bilou_tagging
                )
            ]
        }
示例#2
0
    def encode_entities(
        self,
        entity_data: Dict[Text, Any],
        interpreter: NaturalLanguageInterpreter,
        bilou_tagging: bool = False,
    ) -> Dict[Text, List["Features"]]:
        """Encode the given entity data with the help of the given interpreter.

        Produce numeric entity tags for tokens.

        Args:
            entity_data: The dict containing the text and entity labels and locations
            interpreter: The interpreter used to encode the state
            bilou_tagging: indicates whether BILOU tagging should be used or not

        Returns:
            A dictionary of entity type to list of features.
        """
        # TODO
        #  The entity states used to create the tag-idx-mapping contains the
        #  entities and the concatenated entity and roles/groups. We do not
        #  distinguish between entities and roles/groups right now.
        if (not entity_data or not self.entity_tag_specs
                or self.entity_tag_specs[0].num_tags < 2):
            # we cannot build a classifier with fewer than 2 classes
            return {}

        message = interpreter.featurize_message(Message(entity_data))

        if not message:
            return {}

        if bilou_tagging:
            bilou_utils.apply_bilou_schema_to_message(message)

        return {
            ENTITY_TAGS: [
                model_data_utils.get_tag_ids(message, self.entity_tag_specs[0],
                                             bilou_tagging)
            ]
        }