示例#1
0
    def read_from_json(self, js: Dict[Text, Any], **_: Any) -> "TrainingData":
        """Loads training data stored in the rasa NLU data format."""
        import rasa.shared.nlu.training_data.schemas.data_schema as schema
        import rasa.shared.utils.validation as validation_utils

        validation_utils.validate_training_data(js,
                                                schema.rasa_nlu_data_schema())

        data = js["rasa_nlu_data"]
        common_examples = data.get("common_examples", [])
        entity_synonyms = data.get("entity_synonyms", [])
        regex_features = data.get("regex_features", [])
        lookup_tables = data.get("lookup_tables", [])

        entity_synonyms = transform_entity_synonyms(entity_synonyms)

        training_examples = []
        for ex in common_examples:
            # taking care of custom entries
            msg = Message.build(
                text=ex.pop(TEXT, ""),
                intent=ex.pop(INTENT, None),
                entities=ex.pop(ENTITIES, None),
                **ex,
            )
            training_examples.append(msg)

        return TrainingData(training_examples, entity_synonyms, regex_features,
                            lookup_tables)
    def _get_validated_dict(json_str: Text) -> Dict[Text, Text]:
        """Converts the provided json_str to a valid dict containing the entity
        attributes.

        Users can specify entity roles, synonyms, groups for an entity in a dict, e.g.
        [LA]{"entity": "city", "role": "to", "value": "Los Angeles"}

        Args:
            json_str: the entity dict as string without "{}"

        Raises:
            ValidationError if validation of entity dict fails.
            JSONDecodeError if provided entity dict is not valid json.

        Returns:
            a proper python dict
        """
        import json
        import rasa.shared.utils.validation as validation_utils
        import rasa.shared.nlu.training_data.schemas.data_schema as schema

        # add {} as they are not part of the regex
        try:
            data = json.loads(f"{{{json_str}}}")
        except JSONDecodeError as e:
            rasa.shared.utils.io.raise_warning(
                f"Incorrect training data format ('{{{json_str}}}'), make sure your "
                f"data is valid. For more information about the format visit "
                f"{LEGACY_DOCS_BASE_URL}/nlu/training-data-format/.")
            raise e

        validation_utils.validate_training_data(data,
                                                schema.entity_dict_schema())

        return data
示例#3
0
def test_url_data_format():
    data = """
    {
      "rasa_nlu_data": {
        "entity_synonyms": [
          {
            "value": "nyc",
            "synonyms": ["New York City", "nyc", "the big apple"]
          }
        ],
        "common_examples" : [
          {
            "text": "show me flights to New York City",
            "intent": "unk",
            "entities": [
              {
                "entity": "destination",
                "start": 19,
                "end": 32,
                "value": "NYC"
              }
            ]
          }
        ]
      }
    }"""
    fname = io_utils.create_temporary_file(
        data.encode(rasa.shared.utils.io.DEFAULT_ENCODING),
        suffix="_tmp_training_data.json",
        mode="w+b",
    )
    data = rasa.shared.utils.io.read_json_file(fname)
    assert data is not None
    validation_utils.validate_training_data(data,
                                            schema.rasa_nlu_data_schema())
示例#4
0
def get_validated_dict(json_str: Text) -> Dict[Text, Text]:
    """Converts the provided `json_str` to a valid dict containing the entity
    attributes.

    Users can specify entity roles, synonyms, groups for an entity in a dict, e.g.
    [LA]{"entity": "city", "role": "to", "value": "Los Angeles"}.

    Args:
        json_str: The entity dict as string without "{}".

    Raises:
        ValidationError if validation of entity dict fails.
        JSONDecodeError if provided entity dict is not valid json.

    Returns:
        Deserialized and validated `json_str`.
    """
    import json
    import rasa.shared.utils.validation as validation_utils
    import rasa.shared.nlu.training_data.schemas.data_schema as schema

    # add {} as they are not part of the regex
    try:
        data = json.loads(f"{{{json_str}}}")
    except JSONDecodeError as e:
        rasa.shared.utils.io.raise_warning(
            f"Incorrect training data format ('{{{json_str}}}'). Make sure your "
            f"data is valid.",
            docs=DOCS_URL_TRAINING_DATA_NLU,
        )
        raise e

    validation_utils.validate_training_data(data, schema.entity_dict_schema())

    return data
示例#5
0
def test_validate_training_data_is_throwing_exceptions(invalid_data):
    with pytest.raises(SchemaValidationError):
        validation_utils.validate_training_data(invalid_data,
                                                schema.rasa_nlu_data_schema())
示例#6
0
def test_example_training_data_is_valid():
    demo_json = "data/examples/rasa/demo-rasa.json"
    data = rasa.shared.utils.io.read_json_file(demo_json)
    validation_utils.validate_training_data(data,
                                            schema.rasa_nlu_data_schema())
示例#7
0
def test_entity_dict_is_valid(data):
    validation_utils.validate_training_data(data, schema.entity_dict_schema())
示例#8
0
def test_validate_entity_dict_is_throwing_exceptions(invalid_data):
    with pytest.raises(SchemaValidationError):
        validation_utils.validate_training_data(invalid_data,
                                                schema.entity_dict_schema())