示例#1
0
def test_load_multi_file_training_data(default_domain):
    # the stories file in `data/test_multifile_stories` is the same as in
    # `data/test_stories/stories.md`, but split across multiple files

    data = training.extract_training_data("data/test_stories/stories.md",
                                          default_domain,
                                          featurizer=BinaryFeaturizer(),
                                          max_history=2)

    data_mul = training.extract_training_data("data/test_multifile_stories",
                                              default_domain,
                                              featurizer=BinaryFeaturizer(),
                                              max_history=2)

    assert np.all(data.X == data_mul.X)
    assert np.all(data.y == data_mul.y)
示例#2
0
def test_create_train_data_no_history(default_domain):
    featurizer = BinaryFeaturizer()
    training_data = extract_training_data(DEFAULT_STORIES_FILE,
                                          default_domain,
                                          featurizer,
                                          augmentation_factor=0,
                                          max_history=1)
    assert training_data.X.shape == (11, 1, 10)
    decoded = [
        featurizer.decode(training_data.X[i, :, :],
                          default_domain.input_features) for i in range(0, 11)
    ]
    assert decoded == [[None],
                       [[('intent_goodbye', 1), ('prev_utter_goodbye', 1)]],
                       [[('intent_goodbye', 1), ('prev_action_listen', 1)]],
                       [[('intent_default', 1), ('prev_utter_default', 1)]],
                       [[('intent_default', 1), ('prev_action_listen', 1)]],
                       [[('intent_default', 1), ('slot_name_0', 1),
                         ('prev_utter_default', 1)]],
                       [[('intent_default', 1), ('slot_name_0', 1),
                         ('prev_action_listen', 1)]],
                       [[('intent_greet', 1), ('prev_utter_greet', 1)]],
                       [[('intent_greet', 1), ('prev_action_listen', 1)]],
                       [[('intent_greet', 1), ('entity_name', 1),
                         ('slot_name_0', 1), ('prev_utter_greet', 1)]],
                       [[('intent_greet', 1), ('entity_name', 1),
                         ('slot_name_0', 1), ('prev_action_listen', 1)]]]
示例#3
0
def test_generate_training_data_with_cycles(tmpdir, default_domain):
    featurizer = BinaryFeaturizer()
    training_data = training.extract_training_data(
        "data/test_stories/stories_with_cycle.md",
        default_domain,
        featurizer,
        augmentation_factor=0,
        max_history=4)

    assert training_data.num_examples() == 15

    np.testing.assert_array_equal(
        training_data.y, [2, 4, 0, 2, 4, 0, 1, 0, 2, 4, 0, 1, 0, 0, 3])
示例#4
0
def test_load_training_data_handles_hidden_files(tmpdir, default_domain):
    # create a hidden file

    open(os.path.join(tmpdir.strpath, ".hidden"), 'a').close()
    # create a normal file
    normal_file = os.path.join(tmpdir.strpath, "normal_file")
    open(normal_file, 'a').close()

    data = training.extract_training_data(tmpdir.strpath,
                                          default_domain,
                                          featurizer=BinaryFeaturizer(),
                                          max_history=2)

    assert len(data.X) == 0
    assert len(data.y) == 0
示例#5
0
    def _prepare_training_data(self, resource_name, max_history,
                               augmentation_factor,
                               max_training_samples=None,
                               max_number_of_trackers=2000,
                               remove_duplicates=True):
        """Reads training data from file and prepares it for the training."""

        from rasa_core import training

        if resource_name:
            training_data = training.extract_training_data(
                    resource_name,
                    self.domain,
                    self.featurizer,
                    interpreter=RegexInterpreter(),
                    augmentation_factor=augmentation_factor,
                    max_history=max_history,
                    remove_duplicates=remove_duplicates,
                    max_number_of_trackers=max_number_of_trackers)
            if max_training_samples is not None:
                training_data.limit_training_data_to(max_training_samples)
            return training_data
        else:
            return DialogueTrainingData.empty(self.domain)
示例#6
0
def train_data(max_history, domain):
    return extract_training_data(
            DEFAULT_STORIES_FILE, domain,
            BinaryFeaturizer(), max_history=max_history, remove_duplicates=True)