def generate_dataset(language, *files): """Create a Snips NLU dataset from text friendly files""" language = unicode_string(language) if any(f.endswith(".yml") or f.endswith(".yaml") for f in files): dataset = Dataset.from_yaml_files(language, list(files)) else: dataset = Dataset.from_files(language, list(files)) print(json_string(dataset.json, indent=2, sort_keys=True))
def print_parsing_result(engine, query, intents_filter): from snips_nlu.common.utils import unicode_string, json_string query = unicode_string(query) json_dump = json_string(engine.parse(query, intents_filter), sort_keys=True, indent=2) print(json_dump)
def generate_dataset(language, *yaml_files): """Creates a Snips NLU dataset from YAML definition files Check :meth:`.Intent.from_yaml` and :meth:`.Entity.from_yaml` for the format of the YAML files. Args: language (str): language of the dataset (iso code) *yaml_files: list of intent and entity definition files in YAML format. Returns: None. The json dataset output is printed out on stdout. """ language = unicode_string(language) dataset = Dataset.from_yaml_files(language, list(yaml_files)) print(json_string(dataset.json, indent=2, sort_keys=True))
def writeFileContent(path, content): with path.open(mode="w", encoding="utf8") as f: f.write(unicode_string(content))
def from_yaml_files(cls, language, filenames): """Creates a :class:`.Dataset` from a language and a list of YAML files or streams containing intents and entities data Each file need not correspond to a single entity nor intent. They can consist in several entities and intents merged together in a single file. Args: language (str): language of the dataset (ISO639-1) filenames (iterable): filenames or stream objects corresponding to intents and entities data. Example: A dataset can be defined with a YAML document following the schema illustrated in the example below: >>> import io >>> from snips_nlu.common.utils import json_string >>> dataset_yaml = io.StringIO(''' ... # searchFlight Intent ... --- ... type: intent ... name: searchFlight ... slots: ... - name: origin ... entity: city ... - name: destination ... entity: city ... - name: date ... entity: snips/datetime ... utterances: ... - find me a flight from [origin](Oslo) to [destination](Lima) ... - I need a flight leaving to [destination](Berlin) ... ... # City Entity ... --- ... type: entity ... name: city ... values: ... - london ... - [paris, city of lights]''') >>> dataset = Dataset.from_yaml_files("en", [dataset_yaml]) >>> print(json_string(dataset.json, indent=4, sort_keys=True)) { "entities": { "city": { "automatically_extensible": true, "data": [ { "synonyms": [], "value": "london" }, { "synonyms": [ "city of lights" ], "value": "paris" } ], "matching_strictness": 1.0, "use_synonyms": true } }, "intents": { "searchFlight": { "utterances": [ { "data": [ { "text": "find me a flight from " }, { "entity": "city", "slot_name": "origin", "text": "Oslo" }, { "text": " to " }, { "entity": "city", "slot_name": "destination", "text": "Lima" } ] }, { "data": [ { "text": "I need a flight leaving to " }, { "entity": "city", "slot_name": "destination", "text": "Berlin" } ] } ] } }, "language": "en" } Raises: DatasetFormatError: When one of the documents present in the YAML files has a wrong 'type' attribute, which is not 'entity' nor 'intent' IntentFormatError: When the YAML document of an intent does not correspond to the :ref:`expected intent format <yaml_intent_format>` EntityFormatError: When the YAML document of an entity does not correspond to the :ref:`expected entity format <yaml_entity_format>` """ language = unicode_string(language) entities = [] intents = [] for filename in filenames: if isinstance(filename, io.IOBase): intents_, entities_ = cls._load_dataset_parts( filename, "stream object") else: with io.open(filename, encoding="utf8") as f: intents_, entities_ = cls._load_dataset_parts(f, filename) intents += intents_ entities += entities_ return cls(language, intents, entities)
def test_generate_dataset(self): # Given yaml_string = """ # searchFlight Intent --- type: intent name: searchFlight utterances: - find me a flight to [destination:city](Lima) [date:snips/datetime](tonight) # City Entity --- type: entity name: city values: - [new york, big apple]""" self.tmp_file_path = self.tmp_file_path.with_suffix(".yaml") with self.tmp_file_path.open(mode="w") as f: f.write(unicode_string(yaml_string)) # When out = io.StringIO() with redirect_stdout(out): generate_dataset("en", str(self.tmp_file_path)) printed_value = out.getvalue() # Then expected_value = """{ "entities": { "city": { "automatically_extensible": true, "data": [ { "synonyms": [ "big apple" ], "value": "new york" } ], "matching_strictness": 1.0, "use_synonyms": true }, "snips/datetime": {} }, "intents": { "searchFlight": { "utterances": [ { "data": [ { "text": "find me a flight to " }, { "entity": "city", "slot_name": "destination", "text": "Lima" }, { "text": " " }, { "entity": "snips/datetime", "slot_name": "date", "text": "tonight" } ] } ] } }, "language": "en" } """ self.assertEqual(expected_value, printed_value)
def print_parsing_result(engine, query): query = unicode_string(query) json_dump = json_string(engine.parse(query), sort_keys=True, indent=2) print(json_dump)