示例#1
0
def get_builtin_entity_examples(builtin_entity_kind, language):
    """Provides some examples of the builtin entity in the specified language
    """
    global _ENTITIES_EXAMPLES

    if not isinstance(builtin_entity_kind, str):
        raise TypeError("Expected `builtin_entity_kind` to be of type 'str' "
                        "but found: %s" % type(builtin_entity_kind))
    if not isinstance(language, str):
        raise TypeError(
            "Expected `language` to be of type 'str' but found: %s" %
            type(language))

    if builtin_entity_kind not in _ENTITIES_EXAMPLES:
        _ENTITIES_EXAMPLES[builtin_entity_kind] = dict()

    if language not in _ENTITIES_EXAMPLES[builtin_entity_kind]:
        with string_array_pointer(pointer(CStringArray())) as ptr:
            exit_code = lib.snips_nlu_parsers_builtin_entity_examples(
                builtin_entity_kind.encode("utf8"), language.encode("utf8"),
                byref(ptr))
            check_ffi_error(
                exit_code, "Something went wrong when retrieving "
                "builtin entity examples")
            array = ptr.contents
            _ENTITIES_EXAMPLES[builtin_entity_kind][language] = list(
                array.data[i].decode("utf8") for i in range(array.size))
    return _ENTITIES_EXAMPLES[builtin_entity_kind][language]
示例#2
0
    def build(cls, language, gazetteer_entity_parser_path=None):
        """Builds a `BuiltinEntityParser`

        Args:
            language (str): Language identifier
            gazetteer_entity_parser_path (str, optional): Path to a gazetteer
                entity parser. If None, the builtin entity parser will only
                use grammar entities.
        """
        if isinstance(gazetteer_entity_parser_path, Path):
            gazetteer_entity_parser_path = str(gazetteer_entity_parser_path)
        if not isinstance(language, str):
            raise TypeError("Expected language to be of type 'str' but found:"
                            " %s" % type(language))
        parser_config = dict(
            language=language.upper(),
            gazetteer_parser_path=gazetteer_entity_parser_path)
        parser = c_void_p()
        json_parser_config = bytes(json.dumps(parser_config), encoding="utf8")
        exit_code = lib.snips_nlu_parsers_create_builtin_entity_parser(
            byref(parser), json_parser_config)
        check_ffi_error(
            exit_code, "Something went wrong while creating the "
            "builtin entity parser")
        return cls(parser)
示例#3
0
    def parse(self, text, scope=None):
        """Extract gazetteer entities from *text*

        Args:
            text (str): Input
            scope (list of str, optional): List of entity labels. If defined,
                the parser will extract entities using the provided scope
                instead of the entire scope of all available entities. This
                allows to look for specifics entities.

        Returns:
            list of dict: The list of extracted entities
        """
        if not isinstance(text, str):
            raise TypeError("Expected text to be of type 'str' but found: "
                            "%s" % type(text))
        if scope is not None:
            if not all(isinstance(e, str) for e in scope):
                raise TypeError(
                    "Expected scope to contain objects of type 'str'")
            scope = [e.encode("utf8") for e in scope]
            arr = CStringArray()
            arr.size = c_int(len(scope))
            arr.data = (c_char_p * len(scope))(*scope)
            scope = byref(arr)

        with string_pointer(c_char_p()) as ptr:
            exit_code = lib.snips_nlu_parsers_extract_gazetteer_entities_json(
                self._parser, text.encode("utf8"), scope, byref(ptr))
            check_ffi_error(
                exit_code, "Something went wrong when "
                "extracting gazetteer entities")
            result = string_at(ptr)
            return json.loads(result.decode("utf8"))
示例#4
0
 def persist(self, path):
     """Persists the builtin entity parser on disk at the provided path"""
     if isinstance(path, Path):
         path = str(path)
     exit_code = lib.snips_nlu_parsers_persist_builtin_entity_parser(
         self._parser, path.encode("utf8"))
     check_ffi_error(
         exit_code, "Something went wrong when persisting the "
         "builtin entity parser")
示例#5
0
 def from_path(cls, parser_path):
     """Creates a :class:`BuiltinEntityParser` from a builtin entity parser
     persisted on disk
     """
     if isinstance(parser_path, Path):
         parser_path = str(parser_path)
     parser = c_void_p()
     parser_path = bytes(parser_path, encoding="utf8")
     exit_code = lib.snips_nlu_parsers_load_builtin_entity_parser(
         byref(parser), parser_path)
     check_ffi_error(
         exit_code, "Something went wrong when loading the "
         "builtin entity parser")
     return cls(parser)
    def build(cls, build_config):
        """Create a new :class:`GazetteerEntityParser` from a build config

        The build configuration must have the following format:

            {
                "entity_parsers": [
                    {
                        "entity_identifier": "my_first_entity",
                        "entity_parser": {
                            "gazetteer": [
                                {
                                    "raw_value": "foo bar",
                                    "resolved_value": "Foo Bar"
                                },
                                {
                                    "raw_value": "yolo",
                                    "resolved_value": "Yala"
                                }
                            ],
                            "threshold": 0.6,
                            "n_gazetteer_stop_words": 10,
                            "additional_stop_words": ["the", "a"]
                        }
                    },
                    {
                        "entity_identifier": "my_second_entity",
                        "entity_parser": {
                            "gazetteer": [
                                {
                                    "raw_value": "the stones",
                                    "resolved_value": "The Rolling Stones"
                                }
                            ],
                            "threshold": 0.6,
                            "n_gazetteer_stop_words": None,
                            "additional_stop_words": None
                        }
                    },
                ]
            }
        """
        parser = c_void_p()
        json_parser_config = bytes(json.dumps(build_config), encoding="utf8")
        exit_code = lib.snips_nlu_parsers_build_gazetteer_entity_parser(
            byref(parser), json_parser_config)
        check_ffi_error(
            exit_code, "Something went wrong when building the "
            "gazetteer entity parser")
        return cls(parser)
示例#7
0
def get_complete_entity_ontology():
    """Lists the complete entity ontology for all languages in JSON format
    """
    global _COMPLETE_ENTITY_ONTOLOGY
    if _COMPLETE_ENTITY_ONTOLOGY is None:
        with string_pointer(c_char_p()) as ptr:
            exit_code = lib.snips_nlu_parsers_complete_entity_ontology_json(
                byref(ptr))
            check_ffi_error(
                exit_code, "Something went wrong when retrieving "
                "complete entity ontology")
            json_str = string_at(ptr).decode("utf8")
            _COMPLETE_ENTITY_ONTOLOGY = json.loads(json_str, encoding="utf8")
    return _COMPLETE_ENTITY_ONTOLOGY
示例#8
0
 def from_path(cls, parser_path):
     """Create a :class:`GazetteerEntityParser` from a gazetteer parser
     persisted on disk
     """
     if isinstance(parser_path, Path):
         parser_path = str(parser_path)
     parser = pointer(c_void_p())
     parser_path = bytes(parser_path, encoding="utf8")
     exit_code = lib.snips_nlu_parsers_load_gazetteer_entity_parser(
         byref(parser), parser_path)
     check_ffi_error(
         exit_code, "Something went wrong when loading the "
         "gazetteer entity parser")
     return cls(parser)
示例#9
0
def get_language_entity_ontology(language):
    """Lists the complete entity ontology for the specified language in JSON format
    """
    global _LANGUAGE_ENTITY_ONTOLOGY
    if language not in _LANGUAGE_ENTITY_ONTOLOGY:
        with string_pointer(c_char_p()) as ptr:
            exit_code = lib.snips_nlu_parsers_language_entity_ontology_json(
                language.encode("utf8"), byref(ptr))
            check_ffi_error(
                exit_code, "Something went wrong when retrieving "
                "language entity ontology")
            json_str = string_at(ptr).decode("utf8")
            _LANGUAGE_ENTITY_ONTOLOGY[language] = json.loads(json_str,
                                                             encoding="utf8")
    return _LANGUAGE_ENTITY_ONTOLOGY[language]
示例#10
0
def get_builtin_entity_shortname(entity):
    """Get the short name of the entity

    Examples:

    >>> get_builtin_entity_shortname(u"snips/amountOfMoney")
    'AmountOfMoney'
    """
    global _BUILTIN_ENTITIES_SHORTNAMES
    if entity not in _BUILTIN_ENTITIES_SHORTNAMES:
        with string_pointer(c_char_p()) as ptr:
            exit_code = lib.snips_nlu_ontology_entity_shortname(
                entity.encode("utf8"), byref(ptr))
            check_ffi_error(
                exit_code, "Something went wrong when retrieving "
                "builtin entity shortname")
            result = string_at(ptr)
            _BUILTIN_ENTITIES_SHORTNAMES[entity] = result.decode("utf8")
    return _BUILTIN_ENTITIES_SHORTNAMES[entity]
示例#11
0
    def parse(self, text, scope=None, max_alternative_resolved_values=5):
        """Extracts builtin entities from *text*

        Args:
            text (str): Input
            scope (list of str, optional): List of builtin entity labels. If
                defined, the parser will extract entities using the provided
                scope instead of the entire scope of all available entities.
                This allows to look for specifics builtin entity kinds.
            max_alternative_resolved_values (int, optional): Maximum number of
                alternative resolved values to return in addition to the top
                one (default 5).

        Returns:
            list of dict: The list of extracted entities
        """
        if not isinstance(text, str):
            raise TypeError("Expected language to be of type 'str' but found: "
                            "%s" % type(text))
        if scope is not None:
            if not all(isinstance(e, str) for e in scope):
                raise TypeError(
                    "Expected scope to contain objects of type 'str'")
            scope = [e.encode("utf8") for e in scope]
            arr = CStringArray()
            arr.size = c_int(len(scope))
            arr.data = (c_char_p * len(scope))(*scope)
            scope = byref(arr)

        with string_pointer(c_char_p()) as ptr:
            exit_code = lib.snips_nlu_parsers_extract_builtin_entities_json(
                self._parser, text.encode("utf8"), scope,
                max_alternative_resolved_values, byref(ptr))
            check_ffi_error(
                exit_code, "Something went wrong when extracting "
                "builtin entities")
            result = string_at(ptr)
            return json.loads(result.decode("utf8"))
示例#12
0
def get_supported_entities(language):
    """Lists the builtin entities supported in the specified *language*

    Returns:
          list of str: the list of entity labels
    """
    global _SUPPORTED_ENTITIES

    if not isinstance(language, str):
        raise TypeError("Expected language to be of type 'str' but found: %s" %
                        type(language))

    if language not in _SUPPORTED_ENTITIES:
        with string_array_pointer(pointer(CStringArray())) as ptr:
            exit_code = lib.snips_nlu_parsers_supported_builtin_entities(
                language.encode("utf8"), byref(ptr))
            check_ffi_error(
                exit_code, "Something went wrong when retrieving "
                "supported entities")
            array = ptr.contents
            _SUPPORTED_ENTITIES[language] = set(array.data[i].decode("utf8")
                                                for i in range(array.size))
    return _SUPPORTED_ENTITIES[language]
示例#13
0
    def extend_gazetteer_entity(self, entity_name, entity_values):
        """Extends a builtin gazetteer entity with custom values

        Args:
            entity_name (str): Gazetteer entity identifier
            entity_values (list of dict): List of entity values represented as
                dictionaries with a 'raw_value' key and a 'resolved_value' key

        Returns:
            The same object, updated.

        Raises:
            ValueError: when the entity name is unknown or not present in the
                parser
        """
        if not entity_values:
            return self
        entity_values_json = bytes(json.dumps(entity_values), encoding="utf8")
        exit_code = lib.snips_nlu_parsers_extend_gazetteer_entity_json(
            self._parser, entity_name.encode("utf8"), entity_values_json)
        check_ffi_error(
            exit_code, "Something went wrong when extending the "
            "builtin entity '%s'" % entity_name)
        return self