def inner( pipeline: List[Dict[Text, Any]], training_data: Union[Text, TrainingData], ) -> Tuple[TrainingData, List[GraphComponent]]: if isinstance(training_data, str): importer = RasaFileImporter(training_data_paths=[training_data]) training_data: TrainingData = importer.get_nlu_data() def create_component( component_class: Type[GraphComponent], config: Dict[Text, Any], idx: int ) -> GraphComponent: node_name = f"{component_class.__name__}_{idx}" execution_context = ExecutionContext(GraphSchema({}), node_name=node_name) resource = Resource(node_name) return component_class.create( {**component_class.get_default_config(), **config}, default_model_storage, resource, execution_context, ) component_pipeline = [ create_component(component.pop("component"), component, idx) for idx, component in enumerate(copy.deepcopy(pipeline)) ] for component in component_pipeline: if hasattr(component, "train"): component.train(training_data) if hasattr(component, "process_training_data"): component.process_training_data(training_data) return training_data, component_pipeline
async def test_train_persist_load_with_composite_entities( crf_entity_extractor: Callable[[Dict[Text, Any]], CRFEntityExtractorGraphComponent], default_model_storage: ModelStorage, default_execution_context: ExecutionContext, whitespace_tokenizer: WhitespaceTokenizerGraphComponent, ): importer = RasaFileImporter( training_data_paths=["data/test/demo-rasa-composite-entities.yml"]) training_data = importer.get_nlu_data() whitespace_tokenizer.process_training_data(training_data) crf_extractor = crf_entity_extractor({}) crf_extractor.train(training_data) message = Message(data={TEXT: "I am looking for an italian restaurant"}) whitespace_tokenizer.process([message]) message2 = copy.deepcopy(message) processed_message = crf_extractor.process([message])[0] loaded_extractor = CRFEntityExtractorGraphComponent.load( CRFEntityExtractorGraphComponent.get_default_config(), default_model_storage, Resource("CRFEntityExtractor"), default_execution_context, ) processed_message2 = loaded_extractor.process([message2])[0] assert processed_message2.fingerprint() == processed_message.fingerprint()
def test_read_conversation_tests(project: Text): importer = RasaFileImporter( training_data_paths=[str(Path(project) / DEFAULT_CONVERSATION_TEST_PATH)] ) test_stories = importer.get_conversation_tests() assert len(test_stories.story_steps) == 7
async def test_train_docker_and_docs_configs( config_file: Text, monkeypatch: MonkeyPatch ): monkeypatch.setattr(autoconfig, "_dump_config", Mock()) importer = RasaFileImporter(config_file=config_file) imported_config = importer.get_config() loaded_config = config.load(imported_config) assert len(loaded_config.component_names) > 1 assert loaded_config.language == imported_config["language"]
async def test_process_gives_diagnostic_data( default_execution_context: ExecutionContext, create_response_selector: Callable[[Dict[Text, Any]], ResponseSelector], train_and_preprocess: Callable[..., Tuple[TrainingData, List[GraphComponent]]], process_message: Callable[..., Message], ): """Tests if processing a message returns attention weights as numpy array.""" pipeline = [ { "component": WhitespaceTokenizer }, { "component": CountVectorsFeaturizer }, ] config_params = {EPOCHS: 1} importer = RasaFileImporter( config_file="data/test_response_selector_bot/config.yml", domain_path="data/test_response_selector_bot/domain.yml", training_data_paths=[ "data/test_response_selector_bot/data/rules.yml", "data/test_response_selector_bot/data/stories.yml", "data/test_response_selector_bot/data/nlu.yml", ], ) training_data = importer.get_nlu_data() training_data, loaded_pipeline = train_and_preprocess( pipeline, training_data) default_execution_context.should_add_diagnostic_data = True response_selector = create_response_selector(config_params) response_selector.train(training_data=training_data) message = Message(data={TEXT: "hello"}) message = process_message(loaded_pipeline, message) classified_message = response_selector.process([message])[0] diagnostic_data = classified_message.get(DIAGNOSTIC_DATA) assert isinstance(diagnostic_data, dict) for _, values in diagnostic_data.items(): assert "text_transformed" in values assert isinstance(values.get("text_transformed"), np.ndarray) # The `attention_weights` key should exist, regardless of there # being a transformer assert "attention_weights" in values # By default, ResponseSelector has `number_of_transformer_layers = 0` # in which case the attention weights should be None. assert values.get("attention_weights") is None
def inner( diet: DIETClassifier, pipeline: Optional[List[Dict[Text, Any]]] = None, training_data: str = nlu_data_path, message_text: Text = "Rasa is great!", expect_intent: bool = True, ) -> Message: if not pipeline: pipeline = [ { "name": "WhitespaceTokenizer" }, { "name": "CountVectorsFeaturizer" }, ] loaded_pipeline = [ registry.get_component_class(component.pop("name"))(component) for component in copy.deepcopy(pipeline) ] importer = RasaFileImporter(training_data_paths=[training_data]) training_data = importer.get_nlu_data() for component in loaded_pipeline: component.train(training_data) diet.train(training_data=training_data) message = Message(data={TEXT: message_text}) for component in loaded_pipeline: component.process(message) message2 = copy.deepcopy(message) classified_message = diet.process([message])[0] if expect_intent: assert classified_message.data["intent"]["name"] loaded_diet = create_diet(diet.component_config, load=True) classified_message2 = loaded_diet.process([message2])[0] assert classified_message2.fingerprint( ) == classified_message.fingerprint() return classified_message
async def test_train_persist_with_different_configurations( crf_entity_extractor: Callable[[Dict[Text, Any]], CRFEntityExtractorGraphComponent], config_params: Dict[Text, Any], default_model_storage: ModelStorage, default_execution_context: ExecutionContext, spacy_tokenizer: SpacyTokenizerGraphComponent, spacy_nlp: Language, ): crf_extractor = crf_entity_extractor(config_params) importer = RasaFileImporter(training_data_paths=["data/examples/rasa"]) training_data = importer.get_nlu_data() spacy_model = SpacyModel(model=spacy_nlp, model_name="en_core_web_md") training_data = SpacyPreprocessor({}).process_training_data( training_data, spacy_model) training_data = spacy_tokenizer.process_training_data(training_data) crf_extractor.train(training_data) message = Message(data={TEXT: "I am looking for an italian restaurant"}) messages = SpacyPreprocessor({}).process([message], spacy_model) message = spacy_tokenizer.process(messages)[0] message2 = copy.deepcopy(message) processed_message = crf_extractor.process([message])[0] loaded_extractor = CRFEntityExtractorGraphComponent.load( { **CRFEntityExtractorGraphComponent.get_default_config(), **config_params }, default_model_storage, Resource("CRFEntityExtractor"), default_execution_context, ) processed_message2 = loaded_extractor.process([message2])[0] assert processed_message2.fingerprint() == processed_message.fingerprint() detected_entities = processed_message2.get(ENTITIES) assert len(detected_entities) == 1 assert detected_entities[0]["entity"] == "cuisine" assert detected_entities[0]["value"] == "italian"
def test_verify_correct_e2e_story_structure(tmp_path: Path): story_file_name = tmp_path / "stories.yml" with open(story_file_name, "w") as file: file.write(""" stories: - story: path 1 steps: - user: | hello assistant! Can you help me today? - action: utter_greet - story: path 2 - state is similar but different from the one in path 1 steps: - user: | hello assistant! you Can help me today? - action: utter_goodbye - story: path 3 steps: - user: | That's it for today. Chat again tomorrow! - action: utter_goodbye """) importer = RasaFileImporter( config_file="data/test_config/config_defaults.yml", domain_path="data/test_domains/default.yml", training_data_paths=[story_file_name], training_type=TrainingType.NLU, ) validator = Validator.from_importer(importer) assert validator.verify_story_structure(ignore_warnings=False)
def test_verify_bad_e2e_story_structure_when_text_identical(tmp_path: Path): story_file_name = tmp_path / "stories.yml" story_file_name.write_text(""" version: "3.0" stories: - story: path 1 steps: - user: | amazing! - action: utter_happy - story: path 2 (should always conflict path 1) steps: - user: | amazing! - action: utter_cheer_up """) # The two stories with identical user texts importer = RasaFileImporter( config_file="data/test_config/config_defaults.yml", domain_path="data/test_domains/default.yml", training_data_paths=[story_file_name], training_type=TrainingType.NLU, ) validator = Validator.from_importer(importer) assert not validator.verify_story_structure(ignore_warnings=False)
async def test_verify_story_structure(): importer = RasaFileImporter( domain_path="data/test_domains/default.yml", training_data_paths=[DEFAULT_STORIES_FILE], ) validator = await Validator.from_importer(importer) assert validator.verify_story_structure(ignore_warnings=False)
async def test_verify_valid_utterances(): importer = RasaFileImporter( domain_path="data/test_domains/default.yml", training_data_paths=[DEFAULT_NLU_DATA, DEFAULT_STORIES_FILE], ) validator = await Validator.from_importer(importer) assert validator.verify_utterances()
async def test_verify_intents_does_not_fail_on_valid_data(): importer = RasaFileImporter( domain_path="examples/moodbot/domain.yml", training_data_paths=[DEFAULT_NLU_DATA], ) validator = await Validator.from_importer(importer) assert validator.verify_intents()
async def test_verify_bad_story_structure_ignore_warnings(): importer = RasaFileImporter( domain_path="data/test_domains/default.yml", training_data_paths=["data/test_stories/stories_conflicting_2.md"], ) validator = await Validator.from_importer(importer) assert validator.verify_story_structure(ignore_warnings=True)
def test_verify_bad_story_structure(): importer = RasaFileImporter( domain_path="data/test_domains/default.yml", training_data_paths=["data/test_yaml_stories/stories_conflicting_2.yml"], ) validator = Validator.from_importer(importer) assert not validator.verify_story_structure(ignore_warnings=False)
def load_from_dict( config: Optional[Dict] = None, config_path: Optional[Text] = None, domain_path: Optional[Text] = None, training_data_paths: Optional[List[Text]] = None, training_type: Optional[TrainingType] = TrainingType.BOTH, ) -> "TrainingDataImporter": """Loads a `TrainingDataImporter` instance from a dictionary.""" from rasa.shared.importers.rasa import RasaFileImporter config = config or {} importers = config.get("importers", []) importers = [ TrainingDataImporter._importer_from_dict( importer, config_path, domain_path, training_data_paths, training_type ) for importer in importers ] importers = [importer for importer in importers if importer] if not importers: importers = [ RasaFileImporter( config_path, domain_path, training_data_paths, training_type ) ] return E2EImporter(ResponsesSyncImporter(CombinedDataImporter(importers)))
def test_verify_slot_mappings_valid(tmp_path: Path): domain = tmp_path / "domain.yml" domain.write_text(""" version: "3.0" intents: - activate_booking entities: - city slots: location: type: text influence_conversation: false mappings: - type: from_entity entity: city conditions: - active_loop: booking_form started_booking_form: type: bool influence_conversation: false mappings: - type: from_trigger_intent intent: activate_booking value: true forms: booking_form: required_slots: - started_booking_form - location """) importer = RasaFileImporter(domain_path=domain) validator = Validator.from_importer(importer) assert validator.verify_slot_mappings()
def test_verify_form_slots_invalid_domain(tmp_path: Path): domain = tmp_path / "domain.yml" domain.write_text(""" version: "3.0" forms: name_form: required_slots: - first_name - last_nam slots: first_name: type: text mappings: - type: from_text last_name: type: text mappings: - type: from_text """) importer = RasaFileImporter(domain_path=domain) validator = Validator.from_importer(importer) with pytest.warns(UserWarning) as w: validity = validator.verify_form_slots() assert validity is False assert ( w[0].message.args[0] == "The form slot 'last_nam' in form 'name_form' " "is not present in the domain slots." "Please add the correct slot or check for typos.")
def test_verify_there_is_example_repetition_in_intents(nlu_data_path: Text): # moodbot nlu data already has duplicated example 'good afternoon' # for intents greet and goodbye importer = RasaFileImporter(domain_path="data/test_moodbot/domain.yml", training_data_paths=[nlu_data_path]) validator = Validator.from_importer(importer) assert not validator.verify_example_repetition_in_intents(False)
async def test_verify_story_structure(stories_path: Text): importer = RasaFileImporter( domain_path="data/test_domains/default.yml", training_data_paths=[stories_path], ) validator = await Validator.from_importer(importer) assert validator.verify_story_structure(ignore_warnings=False)
def test_verify_nlu_with_e2e_story(tmp_path: Path, nlu_data_path: Path): story_file_name = tmp_path / "stories.yml" with open(story_file_name, "w") as file: file.write( """ stories: - story: path 1 steps: - user: | hello assistant! Can you help me today? - action: utter_greet - story: path 2 steps: - intent: greet - action: utter_greet """ ) importer = RasaFileImporter( config_file="data/test_moodbot/config.yml", domain_path="data/test_moodbot/domain.yml", training_data_paths=[story_file_name, nlu_data_path], ) validator = Validator.from_importer(importer) assert validator.verify_nlu()
def test_verify_intents_does_fail_on_invalid_data(nlu_data_path: Text): # domain and nlu data are from different domain and should produce warnings importer = RasaFileImporter( domain_path="data/test_domains/default.yml", training_data_paths=[nlu_data_path] ) validator = Validator.from_importer(importer) assert not validator.verify_intents()
async def test_verify_intents_does_not_fail_on_valid_data(nlu_data_path: Text): importer = RasaFileImporter( domain_path="data/test_moodbot/domain.yml", training_data_paths=[nlu_data_path], ) validator = await Validator.from_importer(importer) assert validator.verify_intents()
def test_verify_there_is_not_example_repetition_in_intents(): importer = RasaFileImporter( domain_path="data/test_moodbot/domain.yml", training_data_paths=["examples/knowledgebasebot/data/nlu.yml"], ) validator = Validator.from_importer(importer) assert validator.verify_example_repetition_in_intents(False)
async def test_verify_actions_in_stories_not_in_domain( tmp_path: Path, domain_path: Text ): story_file_name = tmp_path / "stories.yml" story_file_name.write_text( """ version: "2.0" stories: - story: story path 1 steps: - intent: greet - action: action_test_1 """ ) importer = RasaFileImporter( domain_path=domain_path, training_data_paths=[story_file_name], ) validator = await Validator.from_importer(importer) with pytest.warns(UserWarning) as warning: validity = validator.verify_actions_in_stories_rules() assert validity is False assert ( "The action 'action_test_1' is used in the 'story path 1' block, " "but it is not listed in the domain file." in warning[0].message.args[0] )
def test_verify_slot_mappings_mapping_active_loop_not_in_forms(tmp_path: Path): domain = tmp_path / "domain.yml" slot_name = "some_slot" domain.write_text(f""" version: "3.0" entities: - some_entity slots: {slot_name}: type: text influence_conversation: false mappings: - type: from_entity entity: some_entity conditions: - active_loop: som_form forms: some_form: required_slots: - {slot_name} """) importer = RasaFileImporter(domain_path=domain) validator = Validator.from_importer(importer) with pytest.warns( UserWarning, match=r"Slot 'some_slot' has a mapping condition " r"for form 'som_form' which is not listed " r"in domain forms.*", ): assert not validator.verify_slot_mappings()
async def test_valid_stories_rules_default_actions( file_name: Text, data_type: Text, tmp_path: Path ): domain = tmp_path / "domain.yml" domain.write_text( """ version: "2.0" intents: - greet """ ) file_name = tmp_path / f"{file_name}.yml" file_name.write_text( f""" version: "2.0" {file_name}: - {data_type}: test path steps: - intent: greet - action: action_restart """ ) importer = RasaFileImporter(domain_path=domain, training_data_paths=[file_name],) validator = await Validator.from_importer(importer) assert validator.verify_actions_in_stories_rules()
async def from_training_files(cls, training_data_paths: str, domain_path: str, config_path: str, root_dir): """ Create validator from training files. @param training_data_paths: nlu.yml file path. @param domain_path: domain.yml file path. @param config_path: config.yml file path. @param root_dir: training data root directory. @return: """ if not (os.path.exists(training_data_paths) and os.path.exists(domain_path) and os.path.exists(config_path)): raise AppException("Some training files are absent!") try: file_importer = RasaFileImporter( domain_path=domain_path, training_data_paths=training_data_paths, config_file=config_path, ) cls.actions = Utility.read_yaml(os.path.join(root_dir, 'actions.yml')) return await TrainingDataValidator.from_importer(file_importer) except YamlValidationException as e: exc = Utility.replace_file_name(str(e), root_dir) raise AppException(exc) except YamlSyntaxException as e: exc = Utility.replace_file_name(str(e), root_dir) raise AppException(exc) except Exception as e: raise AppException(e)
def validate_files(args: argparse.Namespace, stories_only: bool = False) -> None: """Validates either the story structure or the entire project. Args: args: Commandline arguments stories_only: If `True`, only the story structure is validated. """ from rasa.validator import Validator config = rasa.cli.utils.get_validated_path( args.config, "config", DEFAULT_CONFIG_PATH, none_is_valid=True ) file_importer = RasaFileImporter( domain_path=args.domain, training_data_paths=args.data, config_file=config, ) validator = Validator.from_importer(file_importer) if stories_only: all_good = _validate_story_structure(validator, args) else: all_good = ( _validate_domain(validator) and _validate_nlu(validator, args) and _validate_story_structure(validator, args) ) telemetry.track_validate_files(all_good) if not all_good: rasa.shared.utils.cli.print_error_and_exit( "Project validation completed with errors." )
def validate_files(args: argparse.Namespace, stories_only: bool = False) -> None: """Validates either the story structure or the entire project. Args: args: Commandline arguments stories_only: If `True`, only the story structure is validated. """ loop = asyncio.get_event_loop() file_importer = RasaFileImporter( domain_path=args.domain, training_data_paths=args.data ) validator = loop.run_until_complete(Validator.from_importer(file_importer)) if stories_only: all_good = _validate_story_structure(validator, args) else: all_good = ( _validate_domain(validator) and _validate_nlu(validator, args) and _validate_story_structure(validator, args) ) if not all_good: rasa.shared.utils.cli.print_error_and_exit( "Project validation completed with errors." )
def test_early_exit_on_invalid_domain(): domain_path = "data/test_domains/duplicate_intents.yml" importer = RasaFileImporter(domain_path=domain_path) with pytest.warns(UserWarning) as record: validator = Validator.from_importer(importer) validator.verify_domain_validity() # two for non-unique domains, 2 for auto-fill removal assert len(record) == 4 non_unique_warnings = list( filter( lambda warning: f"Loading domain from '{domain_path}' failed. " f"Using empty domain. Error: 'Intents are not unique! " f"Found multiple intents with name(s) ['default', 'goodbye']. " f"Either rename or remove the duplicate ones.'" in warning.message. args[0], record, )) assert len(non_unique_warnings) == 2 auto_fill_warnings = list( filter( lambda warning: "Slot auto-fill has been removed in 3.0" in warning .message.args[0], record, )) assert len(auto_fill_warnings) == 2