Exemplo n.º 1
0
def test_dump_nlu_with_responses():
    md = """## intent:greet
- hey
- howdy
- hey there
- hello
- hi
- good morning
- good evening
- dear sir

## intent:chitchat/ask_name
- What's your name?
- What can I call you?

## intent:chitchat/ask_weather
- How's the weather?
- Is it too hot outside?
"""

    r = MarkdownReader()
    nlu_data = r.reads(md)

    dumped = nlu_data.nlu_as_markdown()
    assert dumped == md
Exemplo n.º 2
0
def test_check_check_correct_entity_annotations(text: Text, warnings: int):
    reader = MarkdownReader()
    tokenizer = WhitespaceTokenizer()

    training_data = reader.reads(text)
    tokenizer.train(training_data)

    with pytest.warns(UserWarning) as record:
        EntityExtractor.check_correct_entity_annotations(training_data)

    assert len(record) == warnings
    assert all([excerpt in record[0].message.args[0]]
               for excerpt in ["Misaligned entity annotation in sentence"])
Exemplo n.º 3
0
def test_markdown_order():
    r = MarkdownReader()

    md = """## intent:z
- i'm looking for a place to eat
- i'm looking for a place in the [north](loc-direction) of town

## intent:a
- intent a
- also very important
"""

    training_data = r.reads(md)
    assert training_data.nlu_as_markdown() == md
Exemplo n.º 4
0
def test_markdown_entity_regex(example: Text, expected_num_entities: int):
    r = MarkdownReader()

    md = f"""
## intent:test-intent
- {example}
    """

    result = r.reads(md)

    assert len(result.training_examples) == 1
    actual_example = result.training_examples[0]
    assert actual_example.data["intent"] == "test-intent"
    assert len(actual_example.data.get("entities", [])) == expected_num_entities
Exemplo n.º 5
0
def test_markdown_unespace_tokens():
    r = MarkdownReader()

    md = """## intent:test-intent
- Hi \\t Can you help me?\\n I want to go to [Alexandria]{"entity": "city"}
"""
    expected_num_entities = 1

    training_data = r.reads(md)
    assert len(training_data.training_examples) == 1

    actual_example = training_data.training_examples[0]
    assert actual_example.data["intent"] == "test-intent"
    assert len(actual_example.data.get("entities", [])) == expected_num_entities
Exemplo n.º 6
0
def _reader_factory(fformat: Text) -> Optional["TrainingDataReader"]:
    """Generates the appropriate reader class based on the file format."""
    from rasa.shared.nlu.training_data.formats import (
        RasaYAMLReader,
        MarkdownReader,
        WitReader,
        LuisReader,
        RasaReader,
        DialogflowReader,
        NLGMarkdownReader,
    )

    reader = None
    if fformat == LUIS:
        reader = LuisReader()
    elif fformat == WIT:
        reader = WitReader()
    elif fformat in DIALOGFLOW_RELEVANT:
        reader = DialogflowReader()
    elif fformat == RASA:
        reader = RasaReader()
    elif fformat == MARKDOWN:
        reader = MarkdownReader()
    elif fformat == MARKDOWN_NLG:
        reader = NLGMarkdownReader()
    elif fformat == RASA_YAML:
        reader = RasaYAMLReader()
    return reader
Exemplo n.º 7
0
def guess_format(filename: Text) -> Text:
    """Applies heuristics to guess the data format of a file.

    Args:
        filename: file whose type should be guessed

    Returns:
        Guessed file format.
    """
    from rasa.shared.nlu.training_data.formats import RasaYAMLReader

    guess = UNK

    if not os.path.isfile(filename):
        return guess

    try:
        content = rasa.shared.utils.io.read_file(filename)
        js = json.loads(content)
    except ValueError:
        if MarkdownReader.is_markdown_nlu_file(filename):
            guess = MARKDOWN
        elif NLGMarkdownReader.is_markdown_nlg_file(filename):
            guess = MARKDOWN_NLG
        elif RasaYAMLReader.is_yaml_nlu_file(filename):
            guess = RASA_YAML
    else:
        for file_format, format_heuristic in _json_format_heuristics.items():
            if format_heuristic(js, filename):
                guess = file_format
                break

    logger.debug(f"Training data format of '{filename}' is '{guess}'.")

    return guess
    def filter(cls, source_path: Path) -> bool:
        """Checks if the given training data file contains NLU data in `Markdown` format
        and can be converted to `YAML`.

        Args:
            source_path: Path to the training data file.

        Returns:
            `True` if the given file can be converted, `False` otherwise
        """
        return MarkdownReader.is_markdown_nlu_file(source_path)
    def convert_and_write(cls, source_path: Path, output_path: Path) -> None:
        """Converts the given training data file and saves it to the output directory.

        Args:
            source_path: Path to the training data file.
            output_path: Path to the output directory.
        """
        output_nlu_path = cls.generate_path_for_converted_training_data_file(
            source_path, output_path)

        yaml_training_data = MarkdownReader().read(source_path)
        RasaYAMLWriter().dump(output_nlu_path, yaml_training_data)

        for lookup_table in yaml_training_data.lookup_tables:
            cls._write_nlu_lookup_table_yaml(lookup_table, output_path)

        print_success(
            f"Converted NLU file: '{source_path}' >> '{output_nlu_path}'.")
Exemplo n.º 10
0
def test_skip_markdown_reading_deprecation():
    with pytest.warns(None) as warnings:
        MarkdownReader(ignore_deprecation_warning=True)

    assert not warnings
Exemplo n.º 11
0
def test_markdown_reading_deprecation():
    with pytest.warns(FutureWarning):
        MarkdownReader()