def test_validation_with_missing_nlu_target(): graph_config = GraphSchema( { "A": SchemaNode( needs={}, uses=TestNLUTarget, eager=True, constructor_name="create", fn="run", config={}, ) } ) with pytest.raises( GraphSchemaValidationException, match="no target for the 'nlu_target'" ): validation.validate( GraphModelConfiguration( train_schema=GraphSchema({}), predict_schema=graph_config, training_type=TrainingType.BOTH, language=None, core_target=None, nlu_target=None, ) )
def graph_config_for_recipe( self, config: Dict, cli_parameters: Dict[Text, Any], training_type: TrainingType = TrainingType.BOTH, is_finetuning: bool = False, ) -> GraphModelConfiguration: """Converts the default config to graphs (see interface for full docstring).""" mark_as_experimental_feature("graph recipe") if cli_parameters or is_finetuning: raise_warning( "Unlike the Default Recipe, Graph Recipe does not utilize CLI " "parameters or finetuning and these configurations will be ignored. " "Add configuration to the recipe itself if you want them to be used.", docs=DOCS_URL_GRAPH_RECIPE, ) nlu_target, core_target = self.get_targets(config, training_type) return GraphModelConfiguration( train_schema=GraphSchema.from_dict(config.get("train_schema")), predict_schema=GraphSchema.from_dict(config.get("predict_schema")), training_type=training_type, language=config.get("language"), core_target=core_target, nlu_target=nlu_target, )
def test_validation_with_core_target_wrong_type(): graph_config = GraphSchema( { "A": SchemaNode( needs={}, uses=TestNLUTarget, eager=True, constructor_name="create", fn="run", config={}, ), }, ) with pytest.raises( GraphSchemaValidationException, match="Core model's .* invalid return type", ): validation.validate( GraphModelConfiguration( train_schema=GraphSchema({}), predict_schema=graph_config, training_type=TrainingType.BOTH, language=None, core_target="A", nlu_target="A", ))
def test_validate_validates_required_components( test_case: List[RequiredComponentsTestCase], is_train_graph: bool, test_subclass: bool, ): train_schema = GraphSchema({}) predict_schema = DEFAULT_PREDICT_SCHEMA graph_schema = _create_graph_schema_from_requirements( node_needs_requires=test_case.node_needs_requires_tuples, targets=test_case.targets, use_subclass=test_subclass, ) if is_train_graph: train_schema = graph_schema else: predict_schema = graph_schema graph_config = GraphModelConfiguration(train_schema, predict_schema, TrainingType.BOTH, None, None, "nlu_target") num_unmet = test_case.num_unmet_requirements if num_unmet == 0: validation.validate(graph_config) else: message = f"{num_unmet} components are missing" with pytest.raises(GraphSchemaValidationException, match=message): validation.validate(graph_config)
def inner( train_schema: GraphSchema, cache: Optional[TrainingCache] = None, model_storage: Optional[ModelStorage] = None, path: Optional[Path] = None, force_retraining: bool = False, ) -> Path: if not path: path = tmp_path_factory.mktemp("model_storage_path") if not model_storage: model_storage = LocalModelStorage.create(path) if not cache: cache = local_cache_creator(path) graph_trainer = GraphTrainer( model_storage=model_storage, cache=cache, graph_runner_class=DaskGraphRunner ) output_filename = path / "model.tar.gz" graph_trainer.train( GraphModelConfiguration( train_schema=train_schema, predict_schema=GraphSchema({}), language=None, core_target=None, nlu_target="nlu", training_type=TrainingType.BOTH, ), importer=TrainingDataImporter.load_from_dict(domain_path=str(domain_path)), output_filename=output_filename, force_retraining=force_retraining, ) assert output_filename.is_file() return output_filename
def test_read_unsupported_model( monkeypatch: MonkeyPatch, tmp_path_factory: TempPathFactory, domain: Domain, ): train_model_storage = LocalModelStorage( tmp_path_factory.mktemp("train model storage")) graph_schema = GraphSchema(nodes={}) persisted_model_dir = tmp_path_factory.mktemp("persisted models") archive_path = persisted_model_dir / "my-model.tar.gz" # Create outdated model meta data trained_at = datetime.utcnow() model_configuration = GraphModelConfiguration(graph_schema, graph_schema, TrainingType.BOTH, None, None, "nlu") outdated_model_meta_data = ModelMetadata( trained_at=trained_at, rasa_open_source_version=rasa. __version__, # overwrite later to avoid error model_id=uuid.uuid4().hex, domain=domain, train_schema=model_configuration.train_schema, predict_schema=model_configuration.predict_schema, training_type=model_configuration.training_type, project_fingerprint=rasa.model.project_fingerprint(), language=model_configuration.language, core_target=model_configuration.core_target, nlu_target=model_configuration.nlu_target, ) old_version = "0.0.1" outdated_model_meta_data.rasa_open_source_version = old_version # Package model - and inject the outdated model meta data monkeypatch.setattr( LocalModelStorage, "_create_model_metadata", lambda *args, **kwargs: outdated_model_meta_data, ) train_model_storage.create_model_package( model_archive_path=archive_path, model_configuration=model_configuration, domain=domain, ) # Unpack and inspect packaged model load_model_storage_dir = tmp_path_factory.mktemp("load model storage") expected_message = ( f"The model version is trained using Rasa Open Source " f"{old_version} and is not compatible with your current " f"installation .*") with pytest.raises(UnsupportedModelVersionError, match=expected_message): LocalModelStorage.metadata_from_archive(archive_path) with pytest.raises(UnsupportedModelVersionError, match=expected_message): LocalModelStorage.from_model_archive(load_model_storage_dir, archive_path)
def create_test_schema( uses: Type, # The unspecified type is on purpose to enable testing of invalid cases constructor_name: Text = "create", run_fn: Text = "run", needs: Optional[Dict[Text, Text]] = None, eager: bool = True, parent: Optional[Type[GraphComponent]] = None, language: Optional[Text] = None, is_train_graph: bool = True, ) -> GraphModelConfiguration: parent_node = {} if parent: parent_node = { "parent": SchemaNode(needs={}, uses=parent, constructor_name="create", fn="run", config={}) } train_schema = GraphSchema({}) predict_schema = DEFAULT_PREDICT_SCHEMA # noinspection PyTypeChecker schema = GraphSchema( { "my_node": SchemaNode( needs=needs or {}, uses=uses, eager=eager, constructor_name=constructor_name, fn=run_fn, config={}, ), **DEFAULT_PREDICT_SCHEMA.nodes, **parent_node, }, ) if is_train_graph: train_schema = schema else: predict_schema = schema return GraphModelConfiguration( train_schema=train_schema, predict_schema=predict_schema, training_type=TrainingType.BOTH, core_target=None, nlu_target="nlu_target", language=language, )
def test_cycle(is_train_graph: bool): class MyTestComponent(TestComponentWithoutRun): def run(self, training_data: TrainingData) -> TrainingData: pass train_schema = GraphSchema({}) predict_schema = DEFAULT_PREDICT_SCHEMA schema = GraphSchema({ "A": SchemaNode( needs={"training_data": "B"}, uses=MyTestComponent, eager=True, constructor_name="create", fn="run", is_target=True, config={}, ), "B": SchemaNode( needs={"training_data": "C"}, uses=MyTestComponent, eager=True, constructor_name="create", fn="run", config={}, ), "C": SchemaNode( needs={"training_data": "A"}, uses=MyTestComponent, eager=True, constructor_name="create", fn="run", config={}, ), }) if is_train_graph: train_schema = schema else: predict_schema = schema with pytest.raises(GraphSchemaValidationException, match="Cycles"): validation.validate( GraphModelConfiguration( train_schema=train_schema, predict_schema=predict_schema, training_type=TrainingType.BOTH, language=None, core_target=None, nlu_target="nlu_target", ))
def test_create_model_package_with_non_existing_dir( tmp_path: Path, default_model_storage: ModelStorage): path = tmp_path / "some_dir" / "another" / "model.tar.gz" default_model_storage.create_model_package( path, GraphModelConfiguration(GraphSchema({}), GraphSchema({}), TrainingType.BOTH, None, None, "nlu"), Domain.empty(), ) assert path.exists()
def test_create_package_with_non_existing_parent(tmp_path: Path): storage = LocalModelStorage.create(tmp_path) model_file = tmp_path / "new" / "sub" / "dir" / "file.tar.gz" storage.create_model_package( model_file, GraphModelConfiguration(GraphSchema({}), GraphSchema({}), TrainingType.BOTH, None, None, "nlu"), Domain.empty(), ) assert model_file.is_file()
def graph_config_for_recipe( self, config: Dict, cli_parameters: Dict[Text, Any], training_type: TrainingType = TrainingType.BOTH, is_finetuning: bool = False, ) -> GraphModelConfiguration: """Converts the default config to graphs (see interface for full docstring).""" self._use_core = ( bool(config.get("policies")) and not training_type == TrainingType.NLU ) self._use_nlu = ( bool(config.get("pipeline")) and not training_type == TrainingType.CORE ) if not self._use_nlu and training_type == TrainingType.NLU: raise InvalidConfigException( "Can't train an NLU model without a specified pipeline. Please make " "sure to specify a valid pipeline in your configuration." ) if not self._use_core and training_type == TrainingType.CORE: raise InvalidConfigException( "Can't train an Core model without policies. Please make " "sure to specify a valid policy in your configuration." ) self._use_end_to_end = ( self._use_nlu and self._use_core and training_type == TrainingType.END_TO_END ) self._is_finetuning = is_finetuning train_nodes, preprocessors = self._create_train_nodes(config, cli_parameters) predict_nodes = self._create_predict_nodes(config, preprocessors, train_nodes) core_target = "select_prediction" if self._use_core else None from rasa.nlu.classifiers.regex_message_handler import RegexMessageHandler return GraphModelConfiguration( train_schema=GraphSchema(train_nodes), predict_schema=GraphSchema(predict_nodes), training_type=training_type, language=config.get("language"), core_target=core_target, nlu_target=f"run_{RegexMessageHandler.__name__}", )
def test_validation_with_core_target_used_by_other_node(): class CoreTargetConsumer(TestComponentWithoutRun): def run(self, core_target_output: PolicyPrediction) -> PolicyPrediction: pass graph_config = GraphSchema( { "A": SchemaNode( needs={}, uses=TestNLUTarget, eager=True, constructor_name="create", fn="run", config={}, ), "B": SchemaNode( needs={}, uses=TestCoreTarget, eager=True, constructor_name="create", fn="run", config={}, ), "C": SchemaNode( needs={"core_target_output": "B"}, uses=CoreTargetConsumer, eager=True, constructor_name="create", fn="run", config={}, ), }, ) with pytest.raises(GraphSchemaValidationException, match="uses the Core target 'B' as input"): validation.validate( GraphModelConfiguration( train_schema=GraphSchema({}), predict_schema=graph_config, training_type=TrainingType.BOTH, language=None, core_target="B", nlu_target="A", ))
def test_validation_with_placeholders(): class MyTestComponent(TestComponentWithoutRun): def run(self, training_data: TrainingDataImporter) -> TrainingDataImporter: pass graph_config = GraphSchema({ "A": SchemaNode( needs={"training_data": "B"}, uses=MyTestComponent, eager=True, constructor_name="create", fn="run", is_target=True, config={}, ), "B": SchemaNode( needs={"training_data": PLACEHOLDER_IMPORTER}, uses=MyTestComponent, eager=True, constructor_name="create", fn="run", config={}, ), }) # Does not raise validation.validate( GraphModelConfiguration( train_schema=graph_config, predict_schema=DEFAULT_PREDICT_SCHEMA, training_type=TrainingType.BOTH, language=None, core_target=None, nlu_target="nlu_target", ))
def test_loader_loads_graph_runner( default_model_storage: ModelStorage, temp_cache: TrainingCache, tmp_path: Path, tmp_path_factory: TempPathFactory, domain_path: Path, ): graph_trainer = GraphTrainer( model_storage=default_model_storage, cache=temp_cache, graph_runner_class=DaskGraphRunner, ) test_value = "test_value" train_schema = GraphSchema( { "train": SchemaNode( needs={}, uses=PersistableTestComponent, fn="train", constructor_name="create", config={"test_value": test_value}, is_target=True, ), "load": SchemaNode( needs={"resource": "train"}, uses=PersistableTestComponent, fn="run_inference", constructor_name="load", config={}, ), } ) predict_schema = GraphSchema( { "load": SchemaNode( needs={}, uses=PersistableTestComponent, fn="run_inference", constructor_name="load", config={}, is_target=True, resource=Resource("train"), ) } ) output_filename = tmp_path / "model.tar.gz" importer = TrainingDataImporter.load_from_dict( training_data_paths=[], domain_path=str(domain_path) ) trained_at = datetime.utcnow() with freezegun.freeze_time(trained_at): model_metadata = graph_trainer.train( GraphModelConfiguration( train_schema=train_schema, predict_schema=predict_schema, training_type=TrainingType.BOTH, language=None, core_target=None, nlu_target=None, ), importer=importer, output_filename=output_filename, ) assert isinstance(model_metadata, ModelMetadata) assert output_filename.is_file() loaded_model_storage_path = tmp_path_factory.mktemp("loaded model storage") model_metadata, loaded_predict_graph_runner = loader.load_predict_graph_runner( storage_path=loaded_model_storage_path, model_archive_path=output_filename, model_storage_class=LocalModelStorage, graph_runner_class=DaskGraphRunner, ) assert loaded_predict_graph_runner.run() == {"load": test_value} assert model_metadata.predict_schema == predict_schema assert model_metadata.train_schema == train_schema assert model_metadata.model_id assert model_metadata.domain.as_dict() == Domain.from_path(domain_path).as_dict() assert model_metadata.rasa_open_source_version == rasa.__version__ assert model_metadata.trained_at == trained_at
def test_graph_trainer_returns_model_metadata( default_model_storage: ModelStorage, temp_cache: TrainingCache, tmp_path: Path, domain_path: Path, ): graph_trainer = GraphTrainer( model_storage=default_model_storage, cache=temp_cache, graph_runner_class=DaskGraphRunner, ) test_value = "test_value" train_schema = GraphSchema( { "train": SchemaNode( needs={}, uses=PersistableTestComponent, fn="train", constructor_name="create", config={"test_value": test_value}, is_target=True, ), "load": SchemaNode( needs={"resource": "train"}, uses=PersistableTestComponent, fn="run_inference", constructor_name="load", config={}, ), } ) predict_schema = GraphSchema( { "load": SchemaNode( needs={}, uses=PersistableTestComponent, fn="run_inference", constructor_name="load", config={}, is_target=True, resource=Resource("train"), ) } ) output_filename = tmp_path / "model.tar.gz" model_metadata = graph_trainer.train( GraphModelConfiguration( train_schema=train_schema, predict_schema=predict_schema, language=None, core_target=None, nlu_target="nlu", training_type=TrainingType.BOTH, ), importer=TrainingDataImporter.load_from_dict(domain_path=str(domain_path)), output_filename=output_filename, ) assert model_metadata.model_id assert model_metadata.domain.as_dict() == Domain.from_path(domain_path).as_dict() assert model_metadata.train_schema == train_schema assert model_metadata.predict_schema == predict_schema
def test_create_model_package(tmp_path_factory: TempPathFactory, domain: Domain): train_model_storage = LocalModelStorage( tmp_path_factory.mktemp("train model storage")) train_schema = GraphSchema({ "train": SchemaNode( needs={}, uses=PersistableTestComponent, fn="train", constructor_name="create", config={ "some_config": 123455, "some more config": [{ "nested": "hi" }] }, ), "load": SchemaNode( needs={"resource": "train"}, uses=PersistableTestComponent, fn="run_inference", constructor_name="load", config={}, is_target=True, ), }) predict_schema = GraphSchema({ "run": SchemaNode( needs={}, uses=PersistableTestComponent, fn="run", constructor_name="load", config={ "some_config": 123455, "some more config": [{ "nested": "hi" }] }, ) }) # Fill model Storage with train_model_storage.write_to(Resource("resource1")) as directory: file = directory / "file.txt" file.write_text("test") # Package model persisted_model_dir = tmp_path_factory.mktemp("persisted models") archive_path = persisted_model_dir / "my-model.tar.gz" trained_at = datetime.utcnow() with freezegun.freeze_time(trained_at): train_model_storage.create_model_package( archive_path, GraphModelConfiguration(train_schema, predict_schema, TrainingType.BOTH, None, None, "nlu"), domain, ) # Unpack and inspect packaged model load_model_storage_dir = tmp_path_factory.mktemp("load model storage") just_packaged_metadata = LocalModelStorage.metadata_from_archive( archive_path) (load_model_storage, packaged_metadata) = LocalModelStorage.from_model_archive( load_model_storage_dir, archive_path) assert just_packaged_metadata.trained_at == packaged_metadata.trained_at assert packaged_metadata.train_schema == train_schema assert packaged_metadata.predict_schema == predict_schema assert packaged_metadata.domain.as_dict() == domain.as_dict() assert packaged_metadata.rasa_open_source_version == rasa.__version__ assert packaged_metadata.trained_at == trained_at assert packaged_metadata.model_id assert packaged_metadata.project_fingerprint persisted_resources = load_model_storage_dir.glob("*") assert list(persisted_resources) == [ Path(load_model_storage_dir, "resource1") ]