Пример #1
0
    def inner(
        pipeline: List[Dict[Text, Any]], training_data: Union[Text, TrainingData],
    ) -> Tuple[TrainingData, List[GraphComponent]]:

        if isinstance(training_data, str):
            importer = RasaFileImporter(training_data_paths=[training_data])
            training_data: TrainingData = importer.get_nlu_data()

        def create_component(
            component_class: Type[GraphComponent], config: Dict[Text, Any], idx: int
        ) -> GraphComponent:
            node_name = f"{component_class.__name__}_{idx}"
            execution_context = ExecutionContext(GraphSchema({}), node_name=node_name)
            resource = Resource(node_name)
            return component_class.create(
                {**component_class.get_default_config(), **config},
                default_model_storage,
                resource,
                execution_context,
            )

        component_pipeline = [
            create_component(component.pop("component"), component, idx)
            for idx, component in enumerate(copy.deepcopy(pipeline))
        ]

        for component in component_pipeline:
            if hasattr(component, "train"):
                component.train(training_data)
            if hasattr(component, "process_training_data"):
                component.process_training_data(training_data)

        return training_data, component_pipeline
async def test_train_persist_load_with_composite_entities(
    crf_entity_extractor: Callable[[Dict[Text, Any]],
                                   CRFEntityExtractorGraphComponent],
    default_model_storage: ModelStorage,
    default_execution_context: ExecutionContext,
    whitespace_tokenizer: WhitespaceTokenizerGraphComponent,
):
    importer = RasaFileImporter(
        training_data_paths=["data/test/demo-rasa-composite-entities.yml"])
    training_data = importer.get_nlu_data()

    whitespace_tokenizer.process_training_data(training_data)

    crf_extractor = crf_entity_extractor({})
    crf_extractor.train(training_data)

    message = Message(data={TEXT: "I am looking for an italian restaurant"})

    whitespace_tokenizer.process([message])
    message2 = copy.deepcopy(message)

    processed_message = crf_extractor.process([message])[0]

    loaded_extractor = CRFEntityExtractorGraphComponent.load(
        CRFEntityExtractorGraphComponent.get_default_config(),
        default_model_storage,
        Resource("CRFEntityExtractor"),
        default_execution_context,
    )

    processed_message2 = loaded_extractor.process([message2])[0]

    assert processed_message2.fingerprint() == processed_message.fingerprint()
Пример #3
0
def test_read_conversation_tests(project: Text):
    importer = RasaFileImporter(
        training_data_paths=[str(Path(project) / DEFAULT_CONVERSATION_TEST_PATH)]
    )

    test_stories = importer.get_conversation_tests()
    assert len(test_stories.story_steps) == 7
Пример #4
0
async def test_train_docker_and_docs_configs(
    config_file: Text, monkeypatch: MonkeyPatch
):
    monkeypatch.setattr(autoconfig, "_dump_config", Mock())
    importer = RasaFileImporter(config_file=config_file)
    imported_config = importer.get_config()

    loaded_config = config.load(imported_config)

    assert len(loaded_config.component_names) > 1
    assert loaded_config.language == imported_config["language"]
Пример #5
0
async def test_process_gives_diagnostic_data(
    default_execution_context: ExecutionContext,
    create_response_selector: Callable[[Dict[Text, Any]], ResponseSelector],
    train_and_preprocess: Callable[..., Tuple[TrainingData,
                                              List[GraphComponent]]],
    process_message: Callable[..., Message],
):
    """Tests if processing a message returns attention weights as numpy array."""
    pipeline = [
        {
            "component": WhitespaceTokenizer
        },
        {
            "component": CountVectorsFeaturizer
        },
    ]
    config_params = {EPOCHS: 1}

    importer = RasaFileImporter(
        config_file="data/test_response_selector_bot/config.yml",
        domain_path="data/test_response_selector_bot/domain.yml",
        training_data_paths=[
            "data/test_response_selector_bot/data/rules.yml",
            "data/test_response_selector_bot/data/stories.yml",
            "data/test_response_selector_bot/data/nlu.yml",
        ],
    )
    training_data = importer.get_nlu_data()

    training_data, loaded_pipeline = train_and_preprocess(
        pipeline, training_data)

    default_execution_context.should_add_diagnostic_data = True

    response_selector = create_response_selector(config_params)
    response_selector.train(training_data=training_data)

    message = Message(data={TEXT: "hello"})
    message = process_message(loaded_pipeline, message)

    classified_message = response_selector.process([message])[0]
    diagnostic_data = classified_message.get(DIAGNOSTIC_DATA)

    assert isinstance(diagnostic_data, dict)
    for _, values in diagnostic_data.items():
        assert "text_transformed" in values
        assert isinstance(values.get("text_transformed"), np.ndarray)
        # The `attention_weights` key should exist, regardless of there
        # being a transformer
        assert "attention_weights" in values
        # By default, ResponseSelector has `number_of_transformer_layers = 0`
        # in which case the attention weights should be None.
        assert values.get("attention_weights") is None
Пример #6
0
    def inner(
        diet: DIETClassifier,
        pipeline: Optional[List[Dict[Text, Any]]] = None,
        training_data: str = nlu_data_path,
        message_text: Text = "Rasa is great!",
        expect_intent: bool = True,
    ) -> Message:

        if not pipeline:
            pipeline = [
                {
                    "name": "WhitespaceTokenizer"
                },
                {
                    "name": "CountVectorsFeaturizer"
                },
            ]

        loaded_pipeline = [
            registry.get_component_class(component.pop("name"))(component)
            for component in copy.deepcopy(pipeline)
        ]

        importer = RasaFileImporter(training_data_paths=[training_data])
        training_data = importer.get_nlu_data()

        for component in loaded_pipeline:
            component.train(training_data)

        diet.train(training_data=training_data)

        message = Message(data={TEXT: message_text})
        for component in loaded_pipeline:
            component.process(message)

        message2 = copy.deepcopy(message)

        classified_message = diet.process([message])[0]

        if expect_intent:
            assert classified_message.data["intent"]["name"]

        loaded_diet = create_diet(diet.component_config, load=True)

        classified_message2 = loaded_diet.process([message2])[0]

        assert classified_message2.fingerprint(
        ) == classified_message.fingerprint()
        return classified_message
async def test_train_persist_with_different_configurations(
    crf_entity_extractor: Callable[[Dict[Text, Any]],
                                   CRFEntityExtractorGraphComponent],
    config_params: Dict[Text, Any],
    default_model_storage: ModelStorage,
    default_execution_context: ExecutionContext,
    spacy_tokenizer: SpacyTokenizerGraphComponent,
    spacy_nlp: Language,
):

    crf_extractor = crf_entity_extractor(config_params)

    importer = RasaFileImporter(training_data_paths=["data/examples/rasa"])
    training_data = importer.get_nlu_data()

    spacy_model = SpacyModel(model=spacy_nlp, model_name="en_core_web_md")
    training_data = SpacyPreprocessor({}).process_training_data(
        training_data, spacy_model)
    training_data = spacy_tokenizer.process_training_data(training_data)

    crf_extractor.train(training_data)

    message = Message(data={TEXT: "I am looking for an italian restaurant"})
    messages = SpacyPreprocessor({}).process([message], spacy_model)
    message = spacy_tokenizer.process(messages)[0]

    message2 = copy.deepcopy(message)

    processed_message = crf_extractor.process([message])[0]

    loaded_extractor = CRFEntityExtractorGraphComponent.load(
        {
            **CRFEntityExtractorGraphComponent.get_default_config(),
            **config_params
        },
        default_model_storage,
        Resource("CRFEntityExtractor"),
        default_execution_context,
    )

    processed_message2 = loaded_extractor.process([message2])[0]

    assert processed_message2.fingerprint() == processed_message.fingerprint()

    detected_entities = processed_message2.get(ENTITIES)

    assert len(detected_entities) == 1
    assert detected_entities[0]["entity"] == "cuisine"
    assert detected_entities[0]["value"] == "italian"
Пример #8
0
def test_verify_correct_e2e_story_structure(tmp_path: Path):
    story_file_name = tmp_path / "stories.yml"
    with open(story_file_name, "w") as file:
        file.write("""
            stories:
            - story: path 1
              steps:
              - user: |
                  hello assistant! Can you help me today?
              - action: utter_greet
            - story: path 2 - state is similar but different from the one in path 1
              steps:
              - user: |
                  hello assistant! you Can help me today?
              - action: utter_goodbye
            - story: path 3
              steps:
              - user: |
                  That's it for today. Chat again tomorrow!
              - action: utter_goodbye
            """)
    importer = RasaFileImporter(
        config_file="data/test_config/config_defaults.yml",
        domain_path="data/test_domains/default.yml",
        training_data_paths=[story_file_name],
        training_type=TrainingType.NLU,
    )
    validator = Validator.from_importer(importer)
    assert validator.verify_story_structure(ignore_warnings=False)
Пример #9
0
def test_verify_bad_e2e_story_structure_when_text_identical(tmp_path: Path):
    story_file_name = tmp_path / "stories.yml"
    story_file_name.write_text("""
        version: "3.0"
        stories:
        - story: path 1
          steps:
          - user: |
              amazing!
          - action: utter_happy
        - story: path 2 (should always conflict path 1)
          steps:
          - user: |
              amazing!
          - action: utter_cheer_up
        """)
    # The two stories with identical user texts
    importer = RasaFileImporter(
        config_file="data/test_config/config_defaults.yml",
        domain_path="data/test_domains/default.yml",
        training_data_paths=[story_file_name],
        training_type=TrainingType.NLU,
    )
    validator = Validator.from_importer(importer)
    assert not validator.verify_story_structure(ignore_warnings=False)
Пример #10
0
async def test_verify_story_structure():
    importer = RasaFileImporter(
        domain_path="data/test_domains/default.yml",
        training_data_paths=[DEFAULT_STORIES_FILE],
    )
    validator = await Validator.from_importer(importer)
    assert validator.verify_story_structure(ignore_warnings=False)
Пример #11
0
async def test_verify_valid_utterances():
    importer = RasaFileImporter(
        domain_path="data/test_domains/default.yml",
        training_data_paths=[DEFAULT_NLU_DATA, DEFAULT_STORIES_FILE],
    )
    validator = await Validator.from_importer(importer)
    assert validator.verify_utterances()
Пример #12
0
async def test_verify_intents_does_not_fail_on_valid_data():
    importer = RasaFileImporter(
        domain_path="examples/moodbot/domain.yml",
        training_data_paths=[DEFAULT_NLU_DATA],
    )
    validator = await Validator.from_importer(importer)
    assert validator.verify_intents()
Пример #13
0
async def test_verify_bad_story_structure_ignore_warnings():
    importer = RasaFileImporter(
        domain_path="data/test_domains/default.yml",
        training_data_paths=["data/test_stories/stories_conflicting_2.md"],
    )
    validator = await Validator.from_importer(importer)
    assert validator.verify_story_structure(ignore_warnings=True)
Пример #14
0
def test_verify_bad_story_structure():
    importer = RasaFileImporter(
        domain_path="data/test_domains/default.yml",
        training_data_paths=["data/test_yaml_stories/stories_conflicting_2.yml"],
    )
    validator = Validator.from_importer(importer)
    assert not validator.verify_story_structure(ignore_warnings=False)
Пример #15
0
    def load_from_dict(
        config: Optional[Dict] = None,
        config_path: Optional[Text] = None,
        domain_path: Optional[Text] = None,
        training_data_paths: Optional[List[Text]] = None,
        training_type: Optional[TrainingType] = TrainingType.BOTH,
    ) -> "TrainingDataImporter":
        """Loads a `TrainingDataImporter` instance from a dictionary."""

        from rasa.shared.importers.rasa import RasaFileImporter

        config = config or {}
        importers = config.get("importers", [])
        importers = [
            TrainingDataImporter._importer_from_dict(
                importer, config_path, domain_path, training_data_paths, training_type
            )
            for importer in importers
        ]
        importers = [importer for importer in importers if importer]
        if not importers:
            importers = [
                RasaFileImporter(
                    config_path, domain_path, training_data_paths, training_type
                )
            ]

        return E2EImporter(ResponsesSyncImporter(CombinedDataImporter(importers)))
Пример #16
0
def test_verify_slot_mappings_valid(tmp_path: Path):
    domain = tmp_path / "domain.yml"
    domain.write_text("""
        version: "3.0"
        intents:
        - activate_booking
        entities:
        - city
        slots:
          location:
            type: text
            influence_conversation: false
            mappings:
            - type: from_entity
              entity: city
              conditions:
              - active_loop: booking_form
          started_booking_form:
            type: bool
            influence_conversation: false
            mappings:
            - type: from_trigger_intent
              intent: activate_booking
              value: true
        forms:
          booking_form:
            required_slots:
            - started_booking_form
            - location
            """)
    importer = RasaFileImporter(domain_path=domain)
    validator = Validator.from_importer(importer)
    assert validator.verify_slot_mappings()
Пример #17
0
def test_verify_form_slots_invalid_domain(tmp_path: Path):
    domain = tmp_path / "domain.yml"
    domain.write_text("""
        version: "3.0"
        forms:
          name_form:
            required_slots:
              - first_name
              - last_nam
        slots:
             first_name:
                type: text
                mappings:
                - type: from_text
             last_name:
                type: text
                mappings:
                - type: from_text
        """)
    importer = RasaFileImporter(domain_path=domain)
    validator = Validator.from_importer(importer)
    with pytest.warns(UserWarning) as w:
        validity = validator.verify_form_slots()
        assert validity is False

    assert (
        w[0].message.args[0] == "The form slot 'last_nam' in form 'name_form' "
        "is not present in the domain slots."
        "Please add the correct slot or check for typos.")
Пример #18
0
def test_verify_there_is_example_repetition_in_intents(nlu_data_path: Text):
    # moodbot nlu data already has duplicated example 'good afternoon'
    # for intents greet and goodbye
    importer = RasaFileImporter(domain_path="data/test_moodbot/domain.yml",
                                training_data_paths=[nlu_data_path])
    validator = Validator.from_importer(importer)
    assert not validator.verify_example_repetition_in_intents(False)
Пример #19
0
async def test_verify_story_structure(stories_path: Text):
    importer = RasaFileImporter(
        domain_path="data/test_domains/default.yml",
        training_data_paths=[stories_path],
    )
    validator = await Validator.from_importer(importer)
    assert validator.verify_story_structure(ignore_warnings=False)
Пример #20
0
def test_verify_nlu_with_e2e_story(tmp_path: Path, nlu_data_path: Path):
    story_file_name = tmp_path / "stories.yml"
    with open(story_file_name, "w") as file:
        file.write(
            """
            stories:
            - story: path 1
              steps:
              - user: |
                  hello assistant! Can you help me today?
              - action: utter_greet
            - story: path 2
              steps:
              - intent: greet
              - action: utter_greet
            """
        )
    importer = RasaFileImporter(
        config_file="data/test_moodbot/config.yml",
        domain_path="data/test_moodbot/domain.yml",
        training_data_paths=[story_file_name, nlu_data_path],
    )

    validator = Validator.from_importer(importer)
    assert validator.verify_nlu()
Пример #21
0
def test_verify_intents_does_fail_on_invalid_data(nlu_data_path: Text):
    # domain and nlu data are from different domain and should produce warnings
    importer = RasaFileImporter(
        domain_path="data/test_domains/default.yml", training_data_paths=[nlu_data_path]
    )
    validator = Validator.from_importer(importer)
    assert not validator.verify_intents()
Пример #22
0
async def test_verify_intents_does_not_fail_on_valid_data(nlu_data_path: Text):
    importer = RasaFileImporter(
        domain_path="data/test_moodbot/domain.yml",
        training_data_paths=[nlu_data_path],
    )
    validator = await Validator.from_importer(importer)
    assert validator.verify_intents()
Пример #23
0
def test_verify_there_is_not_example_repetition_in_intents():
    importer = RasaFileImporter(
        domain_path="data/test_moodbot/domain.yml",
        training_data_paths=["examples/knowledgebasebot/data/nlu.yml"],
    )
    validator = Validator.from_importer(importer)
    assert validator.verify_example_repetition_in_intents(False)
Пример #24
0
async def test_verify_actions_in_stories_not_in_domain(
    tmp_path: Path, domain_path: Text
):
    story_file_name = tmp_path / "stories.yml"
    story_file_name.write_text(
        """
        version: "2.0"
        stories:
        - story: story path 1
          steps:
          - intent: greet
          - action: action_test_1
        """
    )

    importer = RasaFileImporter(
        domain_path=domain_path, training_data_paths=[story_file_name],
    )
    validator = await Validator.from_importer(importer)
    with pytest.warns(UserWarning) as warning:
        validity = validator.verify_actions_in_stories_rules()
        assert validity is False

    assert (
        "The action 'action_test_1' is used in the 'story path 1' block, "
        "but it is not listed in the domain file." in warning[0].message.args[0]
    )
Пример #25
0
def test_verify_slot_mappings_mapping_active_loop_not_in_forms(tmp_path: Path):
    domain = tmp_path / "domain.yml"
    slot_name = "some_slot"
    domain.write_text(f"""
        version: "3.0"
        entities:
        - some_entity
        slots:
          {slot_name}:
            type: text
            influence_conversation: false
            mappings:
            - type: from_entity
              entity: some_entity
              conditions:
              - active_loop: som_form
        forms:
          some_form:
            required_slots:
              - {slot_name}
        """)
    importer = RasaFileImporter(domain_path=domain)
    validator = Validator.from_importer(importer)
    with pytest.warns(
            UserWarning,
            match=r"Slot 'some_slot' has a mapping condition "
            r"for form 'som_form' which is not listed "
            r"in domain forms.*",
    ):
        assert not validator.verify_slot_mappings()
Пример #26
0
async def test_valid_stories_rules_default_actions(
    file_name: Text, data_type: Text, tmp_path: Path
):
    domain = tmp_path / "domain.yml"
    domain.write_text(
        """
        version: "2.0"
        intents:
        - greet
        """
    )
    file_name = tmp_path / f"{file_name}.yml"
    file_name.write_text(
        f"""
            version: "2.0"
            {file_name}:
            - {data_type}: test path
              steps:
              - intent: greet
              - action: action_restart
            """
    )
    importer = RasaFileImporter(domain_path=domain, training_data_paths=[file_name],)
    validator = await Validator.from_importer(importer)
    assert validator.verify_actions_in_stories_rules()
Пример #27
0
    async def from_training_files(cls, training_data_paths: str, domain_path: str, config_path: str, root_dir):
        """
        Create validator from training files.
        @param training_data_paths: nlu.yml file path.
        @param domain_path: domain.yml file path.
        @param config_path: config.yml file path.
        @param root_dir: training data root directory.
        @return:
        """
        if not (os.path.exists(training_data_paths) and os.path.exists(domain_path) and os.path.exists(config_path)):
            raise AppException("Some training files are absent!")
        try:
            file_importer = RasaFileImporter(
                domain_path=domain_path, training_data_paths=training_data_paths, config_file=config_path,
            )
            cls.actions = Utility.read_yaml(os.path.join(root_dir, 'actions.yml'))

            return await TrainingDataValidator.from_importer(file_importer)
        except YamlValidationException as e:
            exc = Utility.replace_file_name(str(e), root_dir)
            raise AppException(exc)
        except YamlSyntaxException as e:
            exc = Utility.replace_file_name(str(e), root_dir)
            raise AppException(exc)
        except Exception as e:
            raise AppException(e)
Пример #28
0
def validate_files(args: argparse.Namespace, stories_only: bool = False) -> None:
    """Validates either the story structure or the entire project.

    Args:
        args: Commandline arguments
        stories_only: If `True`, only the story structure is validated.
    """
    from rasa.validator import Validator

    config = rasa.cli.utils.get_validated_path(
        args.config, "config", DEFAULT_CONFIG_PATH, none_is_valid=True
    )

    file_importer = RasaFileImporter(
        domain_path=args.domain, training_data_paths=args.data, config_file=config,
    )

    validator = Validator.from_importer(file_importer)

    if stories_only:
        all_good = _validate_story_structure(validator, args)
    else:
        all_good = (
            _validate_domain(validator)
            and _validate_nlu(validator, args)
            and _validate_story_structure(validator, args)
        )

    telemetry.track_validate_files(all_good)
    if not all_good:
        rasa.shared.utils.cli.print_error_and_exit(
            "Project validation completed with errors."
        )
Пример #29
0
def validate_files(args: argparse.Namespace, stories_only: bool = False) -> None:
    """Validates either the story structure or the entire project.

    Args:
        args: Commandline arguments
        stories_only: If `True`, only the story structure is validated.
    """
    loop = asyncio.get_event_loop()
    file_importer = RasaFileImporter(
        domain_path=args.domain, training_data_paths=args.data
    )

    validator = loop.run_until_complete(Validator.from_importer(file_importer))

    if stories_only:
        all_good = _validate_story_structure(validator, args)
    else:
        all_good = (
            _validate_domain(validator)
            and _validate_nlu(validator, args)
            and _validate_story_structure(validator, args)
        )

    if not all_good:
        rasa.shared.utils.cli.print_error_and_exit(
            "Project validation completed with errors."
        )
Пример #30
0
def test_early_exit_on_invalid_domain():
    domain_path = "data/test_domains/duplicate_intents.yml"

    importer = RasaFileImporter(domain_path=domain_path)
    with pytest.warns(UserWarning) as record:
        validator = Validator.from_importer(importer)
    validator.verify_domain_validity()

    # two for non-unique domains, 2 for auto-fill removal
    assert len(record) == 4

    non_unique_warnings = list(
        filter(
            lambda warning: f"Loading domain from '{domain_path}' failed. "
            f"Using empty domain. Error: 'Intents are not unique! "
            f"Found multiple intents with name(s) ['default', 'goodbye']. "
            f"Either rename or remove the duplicate ones.'" in warning.message.
            args[0],
            record,
        ))
    assert len(non_unique_warnings) == 2

    auto_fill_warnings = list(
        filter(
            lambda warning: "Slot auto-fill has been removed in 3.0" in warning
            .message.args[0],
            record,
        ))
    assert len(auto_fill_warnings) == 2