def test_load_multi_file_training_data(default_domain): # the stories file in `data/test_multifile_stories` is the same as in # `data/test_stories/stories.md`, but split across multiple files data = training.extract_training_data("data/test_stories/stories.md", default_domain, featurizer=BinaryFeaturizer(), max_history=2) data_mul = training.extract_training_data("data/test_multifile_stories", default_domain, featurizer=BinaryFeaturizer(), max_history=2) assert np.all(data.X == data_mul.X) assert np.all(data.y == data_mul.y)
def test_create_train_data_no_history(default_domain): featurizer = BinaryFeaturizer() training_data = extract_training_data(DEFAULT_STORIES_FILE, default_domain, featurizer, augmentation_factor=0, max_history=1) assert training_data.X.shape == (11, 1, 10) decoded = [ featurizer.decode(training_data.X[i, :, :], default_domain.input_features) for i in range(0, 11) ] assert decoded == [[None], [[('intent_goodbye', 1), ('prev_utter_goodbye', 1)]], [[('intent_goodbye', 1), ('prev_action_listen', 1)]], [[('intent_default', 1), ('prev_utter_default', 1)]], [[('intent_default', 1), ('prev_action_listen', 1)]], [[('intent_default', 1), ('slot_name_0', 1), ('prev_utter_default', 1)]], [[('intent_default', 1), ('slot_name_0', 1), ('prev_action_listen', 1)]], [[('intent_greet', 1), ('prev_utter_greet', 1)]], [[('intent_greet', 1), ('prev_action_listen', 1)]], [[('intent_greet', 1), ('entity_name', 1), ('slot_name_0', 1), ('prev_utter_greet', 1)]], [[('intent_greet', 1), ('entity_name', 1), ('slot_name_0', 1), ('prev_action_listen', 1)]]]
def test_generate_training_data_with_cycles(tmpdir, default_domain): featurizer = BinaryFeaturizer() training_data = training.extract_training_data( "data/test_stories/stories_with_cycle.md", default_domain, featurizer, augmentation_factor=0, max_history=4) assert training_data.num_examples() == 15 np.testing.assert_array_equal( training_data.y, [2, 4, 0, 2, 4, 0, 1, 0, 2, 4, 0, 1, 0, 0, 3])
def test_load_training_data_handles_hidden_files(tmpdir, default_domain): # create a hidden file open(os.path.join(tmpdir.strpath, ".hidden"), 'a').close() # create a normal file normal_file = os.path.join(tmpdir.strpath, "normal_file") open(normal_file, 'a').close() data = training.extract_training_data(tmpdir.strpath, default_domain, featurizer=BinaryFeaturizer(), max_history=2) assert len(data.X) == 0 assert len(data.y) == 0
def _prepare_training_data(self, resource_name, max_history, augmentation_factor, max_training_samples=None, max_number_of_trackers=2000, remove_duplicates=True): """Reads training data from file and prepares it for the training.""" from rasa_core import training if resource_name: training_data = training.extract_training_data( resource_name, self.domain, self.featurizer, interpreter=RegexInterpreter(), augmentation_factor=augmentation_factor, max_history=max_history, remove_duplicates=remove_duplicates, max_number_of_trackers=max_number_of_trackers) if max_training_samples is not None: training_data.limit_training_data_to(max_training_samples) return training_data else: return DialogueTrainingData.empty(self.domain)
def train_data(max_history, domain): return extract_training_data( DEFAULT_STORIES_FILE, domain, BinaryFeaturizer(), max_history=max_history, remove_duplicates=True)