def test_should_not_retrain_intent_classifier_when_no_force_retrain(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: MakeTea utterances: - make me a [beverage_temperature:Temperature](hot) cup of tea - make me [number_of_cups:snips/number](five) tea cups --- type: intent name: MakeCoffee utterances: - make me [number_of_cups:snips/number](one) cup of coffee please - brew [number_of_cups] cups of coffee""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json parser = ProbabilisticIntentParser() intent_classifier = LogRegIntentClassifier() intent_classifier.fit(dataset) parser.intent_classifier = intent_classifier # When / Then with patch("snips_nlu.intent_classifier.log_reg_classifier" ".LogRegIntentClassifier.fit") as mock_fit: parser.fit(dataset, force_retrain=False) mock_fit.assert_not_called()
def test_log_best_features(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: intent1 utterances: - foo bar --- type: intent name: intent2 utterances: - lorem ipsum""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json shared = self.get_shared_data(dataset) intent_classifier = LogRegIntentClassifier(**shared) # When self.assertIsNone(intent_classifier.log_best_features(20)) intent_classifier.fit(dataset) log = intent_classifier.log_best_features(20) # Then self.assertIsInstance(log, str) self.assertIn("Top 20", log)
def test_training_should_be_reproducible(self): # Given random_state = 40 dataset_stream = io.StringIO(""" --- type: intent name: MakeTea utterances: - make me a [beverage_temperature:Temperature](hot) cup of tea - make me [number_of_cups:snips/number](five) tea cups --- type: intent name: MakeCoffee utterances: - make me [number_of_cups:snips/number](one) cup of coffee please - brew [number_of_cups] cups of coffee""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json # When classifier1 = LogRegIntentClassifier(random_state=random_state) classifier1.fit(dataset) classifier2 = LogRegIntentClassifier(random_state=random_state) classifier2.fit(dataset) # Then with temp_dir() as tmp_dir: dir_classifier1 = tmp_dir / "classifier1" dir_classifier2 = tmp_dir / "classifier2" classifier1.persist(dir_classifier1) classifier2.persist(dir_classifier2) hash1 = dirhash(str(dir_classifier1), 'sha256') hash2 = dirhash(str(dir_classifier2), 'sha256') self.assertEqual(hash1, hash2)
def test_log_activation_weights(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: intent1 utterances: - foo bar --- type: intent name: intent2 utterances: - lorem ipsum""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json shared = self.get_shared_data(dataset) intent_classifier = LogRegIntentClassifier(**shared) text = "yo" utterances = [text_to_utterance(text)] self.assertIsNone(intent_classifier.log_activation_weights(text, None)) # When intent_classifier.fit(dataset) x = intent_classifier.featurizer.transform(utterances)[0] log = intent_classifier.log_activation_weights(text, x, top_n=42) # Then self.assertIsInstance(log, str) self.assertIn("Top 42", log)
def test_should_not_retrain_intent_classifier_when_no_force_retrain(self): # Given parser = ProbabilisticIntentParser() intent_classifier = LogRegIntentClassifier() intent_classifier.fit(BEVERAGE_DATASET) parser.intent_classifier = intent_classifier # When / Then with patch("snips_nlu.intent_classifier.log_reg_classifier" ".LogRegIntentClassifier.fit") as mock_fit: parser.fit(BEVERAGE_DATASET, force_retrain=False) mock_fit.assert_not_called()