示例#1
0
def generate_dataset(language, *files):
    """Create a Snips NLU dataset from text friendly files"""
    language = unicode_string(language)
    if any(f.endswith(".yml") or f.endswith(".yaml") for f in files):
        dataset = Dataset.from_yaml_files(language, list(files))
    else:
        dataset = Dataset.from_files(language, list(files))
    print(json_string(dataset.json, indent=2, sort_keys=True))
def print_parsing_result(engine, query, intents_filter):
    from snips_nlu.common.utils import unicode_string, json_string

    query = unicode_string(query)
    json_dump = json_string(engine.parse(query, intents_filter),
                            sort_keys=True,
                            indent=2)
    print(json_dump)
def generate_dataset(language, *yaml_files):
    """Creates a Snips NLU dataset from YAML definition files

    Check :meth:`.Intent.from_yaml` and :meth:`.Entity.from_yaml` for the
    format of the YAML files.

    Args:
        language (str): language of the dataset (iso code)
        *yaml_files: list of intent and entity definition files in YAML format.

    Returns:
        None. The json dataset output is printed out on stdout.
    """
    language = unicode_string(language)
    dataset = Dataset.from_yaml_files(language, list(yaml_files))
    print(json_string(dataset.json, indent=2, sort_keys=True))
示例#4
0
 def writeFileContent(path, content):
     with path.open(mode="w", encoding="utf8") as f:
         f.write(unicode_string(content))
示例#5
0
    def from_yaml_files(cls, language, filenames):
        """Creates a :class:`.Dataset` from a language and a list of YAML files
        or streams containing intents and entities data

        Each file need not correspond to a single entity nor intent. They can
        consist in several entities and intents merged together in a single
        file.

        Args:
            language (str): language of the dataset (ISO639-1)
            filenames (iterable): filenames or stream objects corresponding to
                intents and entities data.

        Example:

            A dataset can be defined with a YAML document following the schema
            illustrated in the example below:

            >>> import io
            >>> from snips_nlu.common.utils import json_string
            >>> dataset_yaml = io.StringIO('''
            ... # searchFlight Intent
            ... ---
            ... type: intent
            ... name: searchFlight
            ... slots:
            ...   - name: origin
            ...     entity: city
            ...   - name: destination
            ...     entity: city
            ...   - name: date
            ...     entity: snips/datetime
            ... utterances:
            ...   - find me a flight from [origin](Oslo) to [destination](Lima)
            ...   - I need a flight leaving to [destination](Berlin)
            ...
            ... # City Entity
            ... ---
            ... type: entity
            ... name: city
            ... values:
            ...   - london
            ...   - [paris, city of lights]''')
            >>> dataset = Dataset.from_yaml_files("en", [dataset_yaml])
            >>> print(json_string(dataset.json, indent=4, sort_keys=True))
            {
                "entities": {
                    "city": {
                        "automatically_extensible": true,
                        "data": [
                            {
                                "synonyms": [],
                                "value": "london"
                            },
                            {
                                "synonyms": [
                                    "city of lights"
                                ],
                                "value": "paris"
                            }
                        ],
                        "matching_strictness": 1.0,
                        "use_synonyms": true
                    }
                },
                "intents": {
                    "searchFlight": {
                        "utterances": [
                            {
                                "data": [
                                    {
                                        "text": "find me a flight from "
                                    },
                                    {
                                        "entity": "city",
                                        "slot_name": "origin",
                                        "text": "Oslo"
                                    },
                                    {
                                        "text": " to "
                                    },
                                    {
                                        "entity": "city",
                                        "slot_name": "destination",
                                        "text": "Lima"
                                    }
                                ]
                            },
                            {
                                "data": [
                                    {
                                        "text": "I need a flight leaving to "
                                    },
                                    {
                                        "entity": "city",
                                        "slot_name": "destination",
                                        "text": "Berlin"
                                    }
                                ]
                            }
                        ]
                    }
                },
                "language": "en"
            }

        Raises:
            DatasetFormatError: When one of the documents present in the YAML
                files has a wrong 'type' attribute, which is not 'entity' nor
                'intent'
            IntentFormatError: When the YAML document of an intent does not
                correspond to the
                :ref:`expected intent format <yaml_intent_format>`
            EntityFormatError: When the YAML document of an entity does not
                correspond to the
                :ref:`expected entity format <yaml_entity_format>`
        """
        language = unicode_string(language)
        entities = []
        intents = []
        for filename in filenames:
            if isinstance(filename, io.IOBase):
                intents_, entities_ = cls._load_dataset_parts(
                    filename, "stream object")
            else:
                with io.open(filename, encoding="utf8") as f:
                    intents_, entities_ = cls._load_dataset_parts(f, filename)
            intents += intents_
            entities += entities_
        return cls(language, intents, entities)
示例#6
0
    def test_generate_dataset(self):
        # Given
        yaml_string = """
# searchFlight Intent
---
type: intent
name: searchFlight
utterances:
  - find me a flight to [destination:city](Lima) [date:snips/datetime](tonight)

# City Entity
---
type: entity
name: city
values:
  - [new york, big apple]"""
        self.tmp_file_path = self.tmp_file_path.with_suffix(".yaml")
        with self.tmp_file_path.open(mode="w") as f:
            f.write(unicode_string(yaml_string))

        # When
        out = io.StringIO()
        with redirect_stdout(out):
            generate_dataset("en", str(self.tmp_file_path))
        printed_value = out.getvalue()

        # Then
        expected_value = """{
  "entities": {
    "city": {
      "automatically_extensible": true,
      "data": [
        {
          "synonyms": [
            "big apple"
          ],
          "value": "new york"
        }
      ],
      "matching_strictness": 1.0,
      "use_synonyms": true
    },
    "snips/datetime": {}
  },
  "intents": {
    "searchFlight": {
      "utterances": [
        {
          "data": [
            {
              "text": "find me a flight to "
            },
            {
              "entity": "city",
              "slot_name": "destination",
              "text": "Lima"
            },
            {
              "text": " "
            },
            {
              "entity": "snips/datetime",
              "slot_name": "date",
              "text": "tonight"
            }
          ]
        }
      ]
    }
  },
  "language": "en"
}
"""
        self.assertEqual(expected_value, printed_value)
示例#7
0
def print_parsing_result(engine, query):
    query = unicode_string(query)
    json_dump = json_string(engine.parse(query), sort_keys=True, indent=2)
    print(json_dump)