def test_run_cv_evaluation_with_response_selector(): training_data_obj = training_data.load_data("data/examples/rasa/demo-rasa.md") training_data_responses_obj = training_data.load_data( "data/examples/rasa/demo-rasa-responses.md" ) training_data_obj = training_data_obj.merge(training_data_responses_obj) training_data_obj.fill_response_phrases() nlu_config = config.load( "sample_configs/config_embedding_intent_response_selector.yml" ) n_folds = 2 intent_results, entity_results, response_selection_results = cross_validate( training_data_obj, n_folds, nlu_config ) assert len(intent_results.train["Accuracy"]) == n_folds assert len(intent_results.train["Precision"]) == n_folds assert len(intent_results.train["F1-score"]) == n_folds assert len(intent_results.test["Accuracy"]) == n_folds assert len(intent_results.test["Precision"]) == n_folds assert len(intent_results.test["F1-score"]) == n_folds assert len(response_selection_results.train["Accuracy"]) == n_folds assert len(response_selection_results.train["Precision"]) == n_folds assert len(response_selection_results.train["F1-score"]) == n_folds assert len(response_selection_results.test["Accuracy"]) == n_folds assert len(response_selection_results.test["Precision"]) == n_folds assert len(response_selection_results.test["F1-score"]) == n_folds # No entity extractor in pipeline assert len(entity_results.train) == 0 assert len(entity_results.test) == 0
def test_run_cv_evaluation(): td = training_data.load_data("data/examples/rasa/demo-rasa.json") nlu_config = config.load( "sample_configs/config_pretrained_embeddings_spacy.yml") n_folds = 2 intent_results, entity_results = cross_validate(td, n_folds, nlu_config) assert len(intent_results.train["Accuracy"]) == n_folds assert len(intent_results.train["Precision"]) == n_folds assert len(intent_results.train["F1-score"]) == n_folds assert len(intent_results.test["Accuracy"]) == n_folds assert len(intent_results.test["Precision"]) == n_folds assert len(intent_results.test["F1-score"]) == n_folds assert len( entity_results.train["CRFEntityExtractor"]["Accuracy"]) == n_folds assert len( entity_results.train["CRFEntityExtractor"]["Precision"]) == n_folds assert len( entity_results.train["CRFEntityExtractor"]["F1-score"]) == n_folds assert len( entity_results.test["CRFEntityExtractor"]["Accuracy"]) == n_folds assert len( entity_results.test["CRFEntityExtractor"]["Precision"]) == n_folds assert len( entity_results.test["CRFEntityExtractor"]["F1-score"]) == n_folds
def perform_nlu_cross_validation(config: Text, nlu: Text, kwargs: Optional[Dict[Text, Any]]): import rasa.nlu.config from rasa.nlu.test import ( drop_intents_below_freq, cross_validate, return_results, return_entity_results, ) kwargs = kwargs or {} folds = int(kwargs.get("folds", 3)) nlu_config = rasa.nlu.config.load(config) data = rasa.nlu.training_data.load_data(nlu) data = drop_intents_below_freq(data, cutoff=folds) kwargs = minimal_kwargs(kwargs, cross_validate) results, entity_results = cross_validate(data, folds, nlu_config, **kwargs) logger.info("CV evaluation (n={})".format(folds)) if any(results): logger.info("Intent evaluation results") return_results(results.train, "train") return_results(results.test, "test") if any(entity_results): logger.info("Entity evaluation results") return_entity_results(entity_results.train, "train") return_entity_results(entity_results.test, "test")
def test_run_cv_evaluation(pretrained_embeddings_spacy_config): td = training_data.load_data("data/examples/rasa/demo-rasa.json") n_folds = 2 intent_results, entity_results, response_selection_results = cross_validate( td, n_folds, pretrained_embeddings_spacy_config, successes=False, errors=False, disable_plotting=True, ) assert len(intent_results.train["Accuracy"]) == n_folds assert len(intent_results.train["Precision"]) == n_folds assert len(intent_results.train["F1-score"]) == n_folds assert len(intent_results.test["Accuracy"]) == n_folds assert len(intent_results.test["Precision"]) == n_folds assert len(intent_results.test["F1-score"]) == n_folds assert len( entity_results.train["CRFEntityExtractor"]["Accuracy"]) == n_folds assert len( entity_results.train["CRFEntityExtractor"]["Precision"]) == n_folds assert len( entity_results.train["CRFEntityExtractor"]["F1-score"]) == n_folds assert len( entity_results.test["CRFEntityExtractor"]["Accuracy"]) == n_folds assert len( entity_results.test["CRFEntityExtractor"]["Precision"]) == n_folds assert len( entity_results.test["CRFEntityExtractor"]["F1-score"]) == n_folds
def test_run_cv_evaluation_with_response_selector(): training_data_obj = training_data.load_data( "data/examples/rasa/demo-rasa.md") training_data_responses_obj = training_data.load_data( "data/examples/rasa/demo-rasa-responses.md") training_data_obj = training_data_obj.merge(training_data_responses_obj) training_data_obj.fill_response_phrases() nlu_config = RasaNLUModelConfig({ "language": "en", "pipeline": [ { "name": "WhitespaceTokenizer" }, { "name": "CountVectorsFeaturizer" }, { "name": "DIETClassifier", EPOCHS: 2 }, { "name": "ResponseSelector", EPOCHS: 2 }, ], }) n_folds = 2 intent_results, entity_results, response_selection_results = cross_validate( training_data_obj, n_folds, nlu_config, successes=False, errors=False, disable_plotting=True, ) assert len(intent_results.train["Accuracy"]) == n_folds assert len(intent_results.train["Precision"]) == n_folds assert len(intent_results.train["F1-score"]) == n_folds assert len(intent_results.test["Accuracy"]) == n_folds assert len(intent_results.test["Precision"]) == n_folds assert len(intent_results.test["F1-score"]) == n_folds assert len(response_selection_results.train["Accuracy"]) == n_folds assert len(response_selection_results.train["Precision"]) == n_folds assert len(response_selection_results.train["F1-score"]) == n_folds assert len(response_selection_results.test["Accuracy"]) == n_folds assert len(response_selection_results.test["Precision"]) == n_folds assert len(response_selection_results.test["F1-score"]) == n_folds assert len(entity_results.train["DIETClassifier"]["Accuracy"]) == n_folds assert len(entity_results.train["DIETClassifier"]["Precision"]) == n_folds assert len(entity_results.train["DIETClassifier"]["F1-score"]) == n_folds assert len(entity_results.test["DIETClassifier"]["Accuracy"]) == n_folds assert len(entity_results.test["DIETClassifier"]["Precision"]) == n_folds assert len(entity_results.test["DIETClassifier"]["F1-score"]) == n_folds
def test_run_cv_evaluation( pretrained_embeddings_spacy_config: RasaNLUModelConfig, monkeypatch: MonkeyPatch): td = rasa.shared.nlu.training_data.loading.load_data( "data/examples/rasa/demo-rasa.json") nlu_config = RasaNLUModelConfig({ "language": "en", "pipeline": [ { "name": "WhitespaceTokenizer" }, { "name": "CountVectorsFeaturizer" }, { "name": "DIETClassifier", EPOCHS: 2 }, ], }) # mock training trainer = Trainer(nlu_config) trainer.pipeline = remove_pretrained_extractors(trainer.pipeline) mock = Mock(return_value=Interpreter(trainer.pipeline, None)) monkeypatch.setattr(Trainer, "train", mock) n_folds = 2 intent_results, entity_results, response_selection_results = cross_validate( td, n_folds, nlu_config, successes=False, errors=False, disable_plotting=True, report_as_dict=True, ) assert len(intent_results.train["Accuracy"]) == n_folds assert len(intent_results.train["Precision"]) == n_folds assert len(intent_results.train["F1-score"]) == n_folds assert len(intent_results.test["Accuracy"]) == n_folds assert len(intent_results.test["Precision"]) == n_folds assert len(intent_results.test["F1-score"]) == n_folds assert all(key in intent_results.evaluation for key in ["errors", "report"]) assert any( isinstance(intent_report, dict) and intent_report.get("confused_with") is not None for intent_report in intent_results.evaluation["report"].values()) for extractor_evaluation in entity_results.evaluation.values(): assert all(key in extractor_evaluation for key in ["errors", "report"])
def test_run_cv_evaluation_no_entities(): td = rasa.shared.nlu.training_data.loading.load_data( "data/test/demo-rasa-no-ents.yml") nlu_config = RasaNLUModelConfig({ "language": "en", "pipeline": [ { "name": "WhitespaceTokenizer" }, { "name": "CountVectorsFeaturizer" }, { "name": "DIETClassifier", EPOCHS: 25 }, ], }) n_folds = 2 intent_results, entity_results, response_selection_results = cross_validate( td, n_folds, nlu_config, successes=False, errors=False, disable_plotting=True, report_as_dict=True, ) assert len(intent_results.train["Accuracy"]) == n_folds assert len(intent_results.train["Precision"]) == n_folds assert len(intent_results.train["F1-score"]) == n_folds assert len(intent_results.test["Accuracy"]) == n_folds assert len(intent_results.test["Precision"]) == n_folds assert len(intent_results.test["F1-score"]) == n_folds assert all(key in intent_results.evaluation for key in ["errors", "report"]) assert any( isinstance(intent_report, dict) and intent_report.get("confused_with") is not None for intent_report in intent_results.evaluation["report"].values()) assert len(entity_results.train) == 0 assert len(entity_results.test) == 0 assert len(entity_results.evaluation) == 0
def perform_nlu_cross_validation( config: Text, data: TrainingData, output: Text, additional_arguments: Optional[Dict[Text, Any]], ) -> None: """Runs cross-validation on test data. Args: config: The model configuration. data: The data which is used for the cross-validation. output: Output directory for the cross-validation results. additional_arguments: Additional arguments which are passed to the cross-validation, like number of `disable_plotting`. """ import rasa.nlu.config from rasa.nlu.test import ( drop_intents_below_freq, cross_validate, log_results, log_entity_results, ) additional_arguments = additional_arguments or {} folds = int(additional_arguments.get("folds", 3)) nlu_config = rasa.nlu.config.load(config) data = drop_intents_below_freq(data, cutoff=folds) kwargs = rasa.shared.utils.common.minimal_kwargs( additional_arguments, cross_validate ) results, entity_results, response_selection_results = cross_validate( data, folds, nlu_config, output, **kwargs ) logger.info(f"CV evaluation (n={folds})") if any(results): logger.info("Intent evaluation results") log_results(results.train, "train") log_results(results.test, "test") if any(entity_results): logger.info("Entity evaluation results") log_entity_results(entity_results.train, "train") log_entity_results(entity_results.test, "test") if any(response_selection_results): logger.info("Response Selection evaluation results") log_results(response_selection_results.train, "train") log_results(response_selection_results.test, "test")
def test_nlu_with_cross_validation(config: Text, nlu: Text, folds: int = 3): import rasa.nlu.config import rasa.nlu.test as nlu_test nlu_config = rasa.nlu.config.load(config) data = rasa.nlu.training_data.load_data(nlu) data = nlu_test.drop_intents_below_freq(data, cutoff=5) results, entity_results = nlu_test.cross_validate(data, int(folds), nlu_config) logger.info("CV evaluation (n={})".format(folds)) if any(results): logger.info("Intent evaluation results") nlu_test.return_results(results.train, "train") nlu_test.return_results(results.test, "test") if any(entity_results): logger.info("Entity evaluation results") nlu_test.return_entity_results(entity_results.train, "train") nlu_test.return_entity_results(entity_results.test, "test")
def test_run_cv_evaluation( pretrained_embeddings_spacy_config: RasaNLUModelConfig): td = rasa.shared.nlu.training_data.loading.load_data( "data/examples/rasa/demo-rasa.json") n_folds = 2 intent_results, entity_results, response_selection_results = cross_validate( td, n_folds, pretrained_embeddings_spacy_config, successes=False, errors=False, disable_plotting=True, report_as_dict=True, ) assert len(intent_results.train["Accuracy"]) == n_folds assert len(intent_results.train["Precision"]) == n_folds assert len(intent_results.train["F1-score"]) == n_folds assert len(intent_results.test["Accuracy"]) == n_folds assert len(intent_results.test["Precision"]) == n_folds assert len(intent_results.test["F1-score"]) == n_folds assert all(key in intent_results.evaluation for key in ["errors", "report"]) assert any( isinstance(intent_report, dict) and intent_report.get("confused_with") is not None for intent_report in intent_results.evaluation["report"].values()) assert len( entity_results.train["CRFEntityExtractor"]["Accuracy"]) == n_folds assert len( entity_results.train["CRFEntityExtractor"]["Precision"]) == n_folds assert len( entity_results.train["CRFEntityExtractor"]["F1-score"]) == n_folds assert len( entity_results.test["CRFEntityExtractor"]["Accuracy"]) == n_folds assert len( entity_results.test["CRFEntityExtractor"]["Precision"]) == n_folds assert len( entity_results.test["CRFEntityExtractor"]["F1-score"]) == n_folds for extractor_evaluation in entity_results.evaluation.values(): assert all(key in extractor_evaluation for key in ["errors", "report"])
def perform_nlu_cross_validation( config: Text, nlu: Text, output: Text, additional_arguments: Optional[Dict[Text, Any]], ): import rasa.nlu.config from rasa.nlu.test import ( drop_intents_below_freq, cross_validate, log_results, log_entity_results, ) additional_arguments = additional_arguments or {} folds = int(additional_arguments.get("folds", 3)) nlu_config = rasa.nlu.config.load(config) data = rasa.shared.nlu.training_data.loading.load_data(nlu) data = drop_intents_below_freq(data, cutoff=folds) kwargs = rasa.shared.utils.common.minimal_kwargs(additional_arguments, cross_validate) results, entity_results, response_selection_results = cross_validate( data, folds, nlu_config, output, **kwargs) logger.info(f"CV evaluation (n={folds})") if any(results): logger.info("Intent evaluation results") log_results(results.train, "train") log_results(results.test, "test") if any(entity_results): logger.info("Entity evaluation results") log_entity_results(entity_results.train, "train") log_entity_results(entity_results.test, "test") if any(response_selection_results): logger.info("Response Selection evaluation results") log_results(response_selection_results.train, "train") log_results(response_selection_results.test, "test")
def CV_eval(td_file, config_file, Nfolds=10): # trains a model with crossvalidation using the training data and config td = load_data(td_file) configuration = config.load(config_file) cross_validate(td, Nfolds, configuration)
def test_run_cv_evaluation_with_response_selector(): training_data_obj = rasa.shared.nlu.training_data.loading.load_data( "data/test/demo-rasa-more-ents-and-multiplied.yml") training_data_responses_obj = rasa.shared.nlu.training_data.loading.load_data( "data/examples/rasa/demo-rasa-responses.yml") training_data_obj = training_data_obj.merge(training_data_responses_obj) nlu_config = RasaNLUModelConfig({ "language": "en", "pipeline": [ { "name": "WhitespaceTokenizer" }, { "name": "CountVectorsFeaturizer" }, { "name": "DIETClassifier", EPOCHS: 25 }, { "name": "ResponseSelector", EPOCHS: 2 }, ], }) n_folds = 2 intent_results, entity_results, response_selection_results = cross_validate( training_data_obj, n_folds, nlu_config, successes=False, errors=False, disable_plotting=True, report_as_dict=True, ) assert len(intent_results.train["Accuracy"]) == n_folds assert len(intent_results.train["Precision"]) == n_folds assert len(intent_results.train["F1-score"]) == n_folds assert len(intent_results.test["Accuracy"]) == n_folds assert len(intent_results.test["Precision"]) == n_folds assert len(intent_results.test["F1-score"]) == n_folds assert all(key in intent_results.evaluation for key in ["errors", "report"]) assert any( isinstance(intent_report, dict) and intent_report.get("confused_with") is not None for intent_report in intent_results.evaluation["report"].values()) assert len(response_selection_results.train["Accuracy"]) == n_folds assert len(response_selection_results.train["Precision"]) == n_folds assert len(response_selection_results.train["F1-score"]) == n_folds assert len(response_selection_results.test["Accuracy"]) == n_folds assert len(response_selection_results.test["Precision"]) == n_folds assert len(response_selection_results.test["F1-score"]) == n_folds assert all(key in response_selection_results.evaluation for key in ["errors", "report"]) assert any( isinstance(intent_report, dict) and intent_report.get("confused_with") is not None for intent_report in response_selection_results.evaluation["report"].values()) assert len(entity_results.train["DIETClassifier"]["Accuracy"]) == n_folds assert len(entity_results.train["DIETClassifier"]["Precision"]) == n_folds assert len(entity_results.train["DIETClassifier"]["F1-score"]) == n_folds assert len(entity_results.test["DIETClassifier"]["Accuracy"]) == n_folds assert len(entity_results.test["DIETClassifier"]["Precision"]) == n_folds assert len(entity_results.test["DIETClassifier"]["F1-score"]) == n_folds for extractor_evaluation in entity_results.evaluation.values(): assert all(key in extractor_evaluation for key in ["errors", "report"])