def get_builtin_entity_examples(builtin_entity_kind, language): """Provides some examples of the builtin entity in the specified language """ global _ENTITIES_EXAMPLES if not isinstance(builtin_entity_kind, str): raise TypeError("Expected `builtin_entity_kind` to be of type 'str' " "but found: %s" % type(builtin_entity_kind)) if not isinstance(language, str): raise TypeError( "Expected `language` to be of type 'str' but found: %s" % type(language)) if builtin_entity_kind not in _ENTITIES_EXAMPLES: _ENTITIES_EXAMPLES[builtin_entity_kind] = dict() if language not in _ENTITIES_EXAMPLES[builtin_entity_kind]: with string_array_pointer(pointer(CStringArray())) as ptr: exit_code = lib.snips_nlu_parsers_builtin_entity_examples( builtin_entity_kind.encode("utf8"), language.encode("utf8"), byref(ptr)) check_ffi_error( exit_code, "Something went wrong when retrieving " "builtin entity examples") array = ptr.contents _ENTITIES_EXAMPLES[builtin_entity_kind][language] = list( array.data[i].decode("utf8") for i in range(array.size)) return _ENTITIES_EXAMPLES[builtin_entity_kind][language]
def build(cls, language, gazetteer_entity_parser_path=None): """Builds a `BuiltinEntityParser` Args: language (str): Language identifier gazetteer_entity_parser_path (str, optional): Path to a gazetteer entity parser. If None, the builtin entity parser will only use grammar entities. """ if isinstance(gazetteer_entity_parser_path, Path): gazetteer_entity_parser_path = str(gazetteer_entity_parser_path) if not isinstance(language, str): raise TypeError("Expected language to be of type 'str' but found:" " %s" % type(language)) parser_config = dict( language=language.upper(), gazetteer_parser_path=gazetteer_entity_parser_path) parser = c_void_p() json_parser_config = bytes(json.dumps(parser_config), encoding="utf8") exit_code = lib.snips_nlu_parsers_create_builtin_entity_parser( byref(parser), json_parser_config) check_ffi_error( exit_code, "Something went wrong while creating the " "builtin entity parser") return cls(parser)
def parse(self, text, scope=None): """Extract gazetteer entities from *text* Args: text (str): Input scope (list of str, optional): List of entity labels. If defined, the parser will extract entities using the provided scope instead of the entire scope of all available entities. This allows to look for specifics entities. Returns: list of dict: The list of extracted entities """ if not isinstance(text, str): raise TypeError("Expected text to be of type 'str' but found: " "%s" % type(text)) if scope is not None: if not all(isinstance(e, str) for e in scope): raise TypeError( "Expected scope to contain objects of type 'str'") scope = [e.encode("utf8") for e in scope] arr = CStringArray() arr.size = c_int(len(scope)) arr.data = (c_char_p * len(scope))(*scope) scope = byref(arr) with string_pointer(c_char_p()) as ptr: exit_code = lib.snips_nlu_parsers_extract_gazetteer_entities_json( self._parser, text.encode("utf8"), scope, byref(ptr)) check_ffi_error( exit_code, "Something went wrong when " "extracting gazetteer entities") result = string_at(ptr) return json.loads(result.decode("utf8"))
def persist(self, path): """Persists the builtin entity parser on disk at the provided path""" if isinstance(path, Path): path = str(path) exit_code = lib.snips_nlu_parsers_persist_builtin_entity_parser( self._parser, path.encode("utf8")) check_ffi_error( exit_code, "Something went wrong when persisting the " "builtin entity parser")
def from_path(cls, parser_path): """Creates a :class:`BuiltinEntityParser` from a builtin entity parser persisted on disk """ if isinstance(parser_path, Path): parser_path = str(parser_path) parser = c_void_p() parser_path = bytes(parser_path, encoding="utf8") exit_code = lib.snips_nlu_parsers_load_builtin_entity_parser( byref(parser), parser_path) check_ffi_error( exit_code, "Something went wrong when loading the " "builtin entity parser") return cls(parser)
def build(cls, build_config): """Create a new :class:`GazetteerEntityParser` from a build config The build configuration must have the following format: { "entity_parsers": [ { "entity_identifier": "my_first_entity", "entity_parser": { "gazetteer": [ { "raw_value": "foo bar", "resolved_value": "Foo Bar" }, { "raw_value": "yolo", "resolved_value": "Yala" } ], "threshold": 0.6, "n_gazetteer_stop_words": 10, "additional_stop_words": ["the", "a"] } }, { "entity_identifier": "my_second_entity", "entity_parser": { "gazetteer": [ { "raw_value": "the stones", "resolved_value": "The Rolling Stones" } ], "threshold": 0.6, "n_gazetteer_stop_words": None, "additional_stop_words": None } }, ] } """ parser = c_void_p() json_parser_config = bytes(json.dumps(build_config), encoding="utf8") exit_code = lib.snips_nlu_parsers_build_gazetteer_entity_parser( byref(parser), json_parser_config) check_ffi_error( exit_code, "Something went wrong when building the " "gazetteer entity parser") return cls(parser)
def get_complete_entity_ontology(): """Lists the complete entity ontology for all languages in JSON format """ global _COMPLETE_ENTITY_ONTOLOGY if _COMPLETE_ENTITY_ONTOLOGY is None: with string_pointer(c_char_p()) as ptr: exit_code = lib.snips_nlu_parsers_complete_entity_ontology_json( byref(ptr)) check_ffi_error( exit_code, "Something went wrong when retrieving " "complete entity ontology") json_str = string_at(ptr).decode("utf8") _COMPLETE_ENTITY_ONTOLOGY = json.loads(json_str, encoding="utf8") return _COMPLETE_ENTITY_ONTOLOGY
def from_path(cls, parser_path): """Create a :class:`GazetteerEntityParser` from a gazetteer parser persisted on disk """ if isinstance(parser_path, Path): parser_path = str(parser_path) parser = pointer(c_void_p()) parser_path = bytes(parser_path, encoding="utf8") exit_code = lib.snips_nlu_parsers_load_gazetteer_entity_parser( byref(parser), parser_path) check_ffi_error( exit_code, "Something went wrong when loading the " "gazetteer entity parser") return cls(parser)
def get_language_entity_ontology(language): """Lists the complete entity ontology for the specified language in JSON format """ global _LANGUAGE_ENTITY_ONTOLOGY if language not in _LANGUAGE_ENTITY_ONTOLOGY: with string_pointer(c_char_p()) as ptr: exit_code = lib.snips_nlu_parsers_language_entity_ontology_json( language.encode("utf8"), byref(ptr)) check_ffi_error( exit_code, "Something went wrong when retrieving " "language entity ontology") json_str = string_at(ptr).decode("utf8") _LANGUAGE_ENTITY_ONTOLOGY[language] = json.loads(json_str, encoding="utf8") return _LANGUAGE_ENTITY_ONTOLOGY[language]
def get_builtin_entity_shortname(entity): """Get the short name of the entity Examples: >>> get_builtin_entity_shortname(u"snips/amountOfMoney") 'AmountOfMoney' """ global _BUILTIN_ENTITIES_SHORTNAMES if entity not in _BUILTIN_ENTITIES_SHORTNAMES: with string_pointer(c_char_p()) as ptr: exit_code = lib.snips_nlu_ontology_entity_shortname( entity.encode("utf8"), byref(ptr)) check_ffi_error( exit_code, "Something went wrong when retrieving " "builtin entity shortname") result = string_at(ptr) _BUILTIN_ENTITIES_SHORTNAMES[entity] = result.decode("utf8") return _BUILTIN_ENTITIES_SHORTNAMES[entity]
def parse(self, text, scope=None, max_alternative_resolved_values=5): """Extracts builtin entities from *text* Args: text (str): Input scope (list of str, optional): List of builtin entity labels. If defined, the parser will extract entities using the provided scope instead of the entire scope of all available entities. This allows to look for specifics builtin entity kinds. max_alternative_resolved_values (int, optional): Maximum number of alternative resolved values to return in addition to the top one (default 5). Returns: list of dict: The list of extracted entities """ if not isinstance(text, str): raise TypeError("Expected language to be of type 'str' but found: " "%s" % type(text)) if scope is not None: if not all(isinstance(e, str) for e in scope): raise TypeError( "Expected scope to contain objects of type 'str'") scope = [e.encode("utf8") for e in scope] arr = CStringArray() arr.size = c_int(len(scope)) arr.data = (c_char_p * len(scope))(*scope) scope = byref(arr) with string_pointer(c_char_p()) as ptr: exit_code = lib.snips_nlu_parsers_extract_builtin_entities_json( self._parser, text.encode("utf8"), scope, max_alternative_resolved_values, byref(ptr)) check_ffi_error( exit_code, "Something went wrong when extracting " "builtin entities") result = string_at(ptr) return json.loads(result.decode("utf8"))
def get_supported_entities(language): """Lists the builtin entities supported in the specified *language* Returns: list of str: the list of entity labels """ global _SUPPORTED_ENTITIES if not isinstance(language, str): raise TypeError("Expected language to be of type 'str' but found: %s" % type(language)) if language not in _SUPPORTED_ENTITIES: with string_array_pointer(pointer(CStringArray())) as ptr: exit_code = lib.snips_nlu_parsers_supported_builtin_entities( language.encode("utf8"), byref(ptr)) check_ffi_error( exit_code, "Something went wrong when retrieving " "supported entities") array = ptr.contents _SUPPORTED_ENTITIES[language] = set(array.data[i].decode("utf8") for i in range(array.size)) return _SUPPORTED_ENTITIES[language]
def extend_gazetteer_entity(self, entity_name, entity_values): """Extends a builtin gazetteer entity with custom values Args: entity_name (str): Gazetteer entity identifier entity_values (list of dict): List of entity values represented as dictionaries with a 'raw_value' key and a 'resolved_value' key Returns: The same object, updated. Raises: ValueError: when the entity name is unknown or not present in the parser """ if not entity_values: return self entity_values_json = bytes(json.dumps(entity_values), encoding="utf8") exit_code = lib.snips_nlu_parsers_extend_gazetteer_entity_json( self._parser, entity_name.encode("utf8"), entity_values_json) check_ffi_error( exit_code, "Something went wrong when extending the " "builtin entity '%s'" % entity_name) return self