Пример #1
0
def test_create_train_data_no_history(default_domain):
    featurizer = BinaryFeaturizer()
    X, y = extract_training_data_from_file(
            DEFAULT_STORIES_FILE,
            augmentation_factor=0,
            domain=default_domain,
            featurizer=featurizer,
            max_history=1
    )
    assert X.shape == (11, 1, 10)
    decoded = [featurizer.decode(X[i, :, :], default_domain.input_features)
               for i in range(0, 11)]
    assert decoded == [
        [None],
        [[('intent_goodbye', 1), ('prev_utter_goodbye', 1)]],
        [[('intent_goodbye', 1), ('prev_action_listen', 1)]],
        [[('intent_default', 1), ('prev_utter_default', 1)]],
        [[('intent_default', 1), ('prev_action_listen', 1)]],
        [[('intent_default', 1), ('slot_name_0', 1),
          ('prev_utter_default', 1)]],
        [[('intent_default', 1), ('slot_name_0', 1),
          ('prev_action_listen', 1)]],
        [[('intent_greet', 1), ('prev_utter_greet', 1)]],
        [[('intent_greet', 1), ('prev_action_listen', 1)]],
        [[('intent_greet', 1), ('entity_name', 1), ('slot_name_0', 1),
          ('prev_utter_greet', 1)]],
        [[('intent_greet', 1), ('entity_name', 1), ('slot_name_0', 1),
          ('prev_action_listen', 1)]]]
Пример #2
0
def test_binary_featurizer_uses_correct_dtype_float():
    f = BinaryFeaturizer()
    encoded = f.encode({
        "a": 1.0,
        "b": 0.2,
        "c": 0.0
    }, {
        "a": 0,
        "b": 3,
        "c": 2,
        "d": 1
    })
    assert encoded.dtype == np.float64
Пример #3
0
def test_binary_featurizer_handles_on_non_existing_features():
    f = BinaryFeaturizer()
    encoded = f.encode({
        "a": 1.0,
        "b": 1.0,
        "c": 0.0,
        "e": 1.0
    }, {
        "a": 0,
        "b": 3,
        "c": 2,
        "d": 1
    })
    assert (encoded == np.array([1, 0, 0, 1])).all()
Пример #4
0
def test_can_read_test_story(default_domain):
    trackers = extract_trackers_from_file("data/test_stories/stories.md",
                                          default_domain,
                                          featurizer=BinaryFeaturizer())
    assert len(trackers) == 7
    # this should be the story simple_story_with_only_end -> show_it_all
    # the generated stories are in a non stable order - therefore we need to
    # do some trickery to find the one we want to test
    tracker = [t for t in trackers if len(t.events) == 5][0]
    assert tracker.events[0] == ActionExecuted("action_listen")
    assert tracker.events[1] == UserUttered("simple",
                                            intent={
                                                "name": "simple",
                                                "confidence": 1.0
                                            },
                                            parse_data={
                                                'text':
                                                'simple',
                                                'intent_ranking': [{
                                                    'confidence':
                                                    1.0,
                                                    'name':
                                                    'simple'
                                                }],
                                                'intent': {
                                                    'confidence': 1.0,
                                                    'name': 'simple'
                                                },
                                                'entities': []
                                            })
    assert tracker.events[2] == ActionExecuted("utter_default")
    assert tracker.events[3] == ActionExecuted("utter_greet")
    assert tracker.events[4] == ActionExecuted("action_listen")
Пример #5
0
def test_load_multi_file_training_data(default_domain):
    # the stories file in `data/test_multifile_stories` is the same as in
    # `data/test_stories/stories.md`, but split across multiple files

    data = training.extract_training_data("data/test_stories/stories.md",
                                          default_domain,
                                          featurizer=BinaryFeaturizer(),
                                          max_history=2)

    data_mul = training.extract_training_data("data/test_multifile_stories",
                                              default_domain,
                                              featurizer=BinaryFeaturizer(),
                                              max_history=2)

    assert np.all(data.X == data_mul.X)
    assert np.all(data.y == data_mul.y)
Пример #6
0
    def test_missing_classes_filled_correctly(self, default_domain, data,
                                              tracker):
        # Pretend that a couple of classes are missing and check that
        # those classes are predicted as 0, while the other class
        # probabilities are predicted normally.
        policy = self.create_policy(
            featurizer=BinaryFeaturizer(),
            max_history=self.max_history,
            cv=None,
        )
        X = data.X
        classes = [3, 4, 7]
        y = np.asarray([np.random.choice(classes) for _ in X])
        data = DialogueTrainingData(X, y)

        policy.train(data, domain=default_domain)
        predicted_probabilities = policy.predict_action_probabilities(
            tracker, default_domain)

        assert len(predicted_probabilities) == 8
        assert np.allclose(sum(predicted_probabilities), 1.0)
        for i, prob in enumerate(predicted_probabilities):
            if i in classes:
                assert prob >= 0.0
            else:
                assert prob == 0.0
Пример #7
0
def test_persist_and_read_test_story(tmpdir, default_domain):
    graph = extract_story_graph_from_file("data/test_stories/stories.md",
                                          default_domain)
    out_path = tmpdir.join("persisted_story.md")
    Story(graph.story_steps).dump_to_file(out_path.strpath)

    recovered_trackers = extract_trackers_from_file(out_path.strpath,
                                                    default_domain,
                                                    BinaryFeaturizer())
    existing_trackers = extract_trackers_from_file(
        "data/test_stories/stories.md", default_domain, BinaryFeaturizer())
    existing_stories = {t.export_stories() for t in existing_trackers}
    for t in recovered_trackers:
        story_str = t.export_stories()
        assert story_str in existing_stories
        existing_stories.discard(story_str)
Пример #8
0
def train_data(max_history, domain):
    return extract_training_data_from_file(
        "data/dsl_stories/stories_defaultdomain.md",
        domain=domain,
        max_history=max_history,
        remove_duplicates=True,
        featurizer=BinaryFeaturizer())
Пример #9
0
 def test_persist_and_load_empty_policy(self, tmpdir):
     empty_policy = self.create_policy()
     empty_policy.persist(tmpdir.strpath)
     loaded = empty_policy.__class__.load(tmpdir.strpath,
                                          BinaryFeaturizer(),
                                          empty_policy.max_history)
     assert loaded is not None
Пример #10
0
def test_create_train_data_with_history(default_domain):
    featurizer = BinaryFeaturizer()
    X, y = extract_training_data_from_file(
        "data/dsl_stories/stories_defaultdomain.md",
        augmentation_factor=0,
        domain=default_domain,
        featurizer=featurizer,
        max_history=4)
    reference = np.array([
        [[0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 1, 0, 0, 0, 0],
         [1, 0, 0, 0, 0, 0, 0, 1, 0], [0, 1, 0, 0, 1, 0, 0, 0, 0]],
        [[0, 1, 0, 0, 1, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 1, 0, 0],
         [0, 0, 1, 0, 1, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 1]],
        [[1, 0, 0, 0, 0, 0, 0, 1, 0], [0, 1, 0, 0, 1, 0, 0, 0, 0],
         [0, 1, 0, 0, 0, 0, 1, 0, 0], [0, 0, 1, 0, 1, 0, 0, 0, 0]],
        [[1, 0, 0, 0, 1, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 1, 0],
         [0, 1, 0, 0, 1, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 1, 0, 0]],
        [[-1, -1, -1, -1, -1, -1, -1, -1, -1], [0, 0, 0, 0, 0, 0, 0, 0, 0],
         [1, 0, 0, 0, 1, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 1, 0]],
        [[-1, -1, -1, -1, -1, -1, -1, -1, -1],
         [-1, -1, -1, -1, -1, -1, -1, -1, -1],
         [-1, -1, -1, -1, -1, -1, -1, -1, -1], [0, 0, 0, 0, 0, 0, 0, 0, 0]],
        [[-1, -1, -1, -1, -1, -1, -1, -1, -1],
         [-1, -1, -1, -1, -1, -1, -1, -1, -1], [0, 0, 0, 0, 0, 0, 0, 0, 0],
         [1, 0, 0, 0, 1, 0, 0, 0, 0]],
    ])
    assert X.shape == reference.shape
    assert np.array_equal(X, reference)
Пример #11
0
 def test_train_kwargs_are_set_on_model(self, default_domain, data):
     policy = self.create_policy(
         featurizer=BinaryFeaturizer(),
         max_history=self.max_history,
         cv=None,
     )
     policy.train(data, domain=default_domain, C=123)
     assert policy.model.C == 123
 def trained_policy(self):
     default_domain = TemplateDomain.load(DEFAULT_DOMAIN_PATH)
     policy = self.create_policy()
     X, y = train_data(self.max_history, default_domain)
     policy.max_history = self.max_history
     policy.featurizer = BinaryFeaturizer()
     policy.train(X, y, default_domain)
     return policy
Пример #13
0
 def trained_policy(self):
     default_domain = TemplateDomain.load("examples/default_domain.yml")
     policy = self.create_policy()
     X, y = train_data(self.max_history, default_domain)
     policy.max_history = self.max_history
     policy.featurizer = BinaryFeaturizer()
     policy.train(X, y, default_domain)
     return policy
Пример #14
0
def test_persist_and_read_test_story_graph(tmpdir, default_domain):
    graph = training.extract_story_graph("data/test_stories/stories.md",
                                         default_domain)
    out_path = tmpdir.join("persisted_story.md")
    with io.open(out_path.strpath, "w") as f:
        f.write(graph.as_story_string())

    recovered_trackers = training.extract_trackers(out_path.strpath,
                                                   default_domain,
                                                   BinaryFeaturizer())
    existing_trackers = training.extract_trackers(
        "data/test_stories/stories.md", default_domain, BinaryFeaturizer())

    existing_stories = {t.export_stories() for t in existing_trackers}
    for t in recovered_trackers:
        story_str = t.export_stories()
        assert story_str in existing_stories
        existing_stories.discard(story_str)
Пример #15
0
def default_processor(default_domain):
    ensemble = SimplePolicyEnsemble([ScoringPolicy()])
    interpreter = RegexInterpreter()
    PolicyTrainer(ensemble, default_domain,
                  BinaryFeaturizer()).train(DEFAULT_STORIES_FILE,
                                            max_history=3)
    tracker_store = InMemoryTrackerStore(default_domain)
    return MessageProcessor(interpreter, ensemble, default_domain,
                            tracker_store)
Пример #16
0
def test_tracker_write_to_story(tmpdir, default_domain):
    tracker = tracker_from_dialogue_file("data/test_dialogues/enter_name.json",
                                         default_domain)
    p = tmpdir.join("export.md")
    tracker.export_stories_to_file(p.strpath)
    trackers = extract_trackers_from_file(p.strpath, default_domain,
                                          BinaryFeaturizer())
    assert len(trackers) == 1
    recovered = trackers[0]
    assert len(recovered.events) == 8
    assert recovered.events[6] == SlotSet("location", "central")
Пример #17
0
    def test_cv_none_does_not_trigger_search(self, mock_search, default_domain,
                                             data):
        policy = self.create_policy(
            featurizer=BinaryFeaturizer(),
            max_history=self.max_history,
            cv=None,
        )
        policy.train(data, domain=default_domain)

        assert mock_search.call_count == 0
        assert policy.model != 'mockmodel'
Пример #18
0
def test_tracker_write_to_story(tmpdir, default_domain):
    tracker = tracker_from_dialogue_file("data/test_dialogues/enter_name.json",
                                         default_domain)
    p = tmpdir.join("export.md")
    tracker.export_stories_to_file(p.strpath)
    trackers = training.extract_trackers(p.strpath, default_domain,
                                         BinaryFeaturizer())
    assert len(trackers) == 1
    recovered = trackers[0]
    assert len(recovered.events) == 8
    assert recovered.events[6].type_name == "slot"
    assert recovered.events[6].key in {"location", "name"}
    assert recovered.events[6].value in {"central", "holger"}
Пример #19
0
    def test_cv_not_none_param_grid_none_triggers_search_without_params(
            self, mock_search, default_domain, data):
        policy = self.create_policy(
            featurizer=BinaryFeaturizer(),
            max_history=self.max_history,
            cv=3,
        )
        policy.train(data, domain=default_domain)

        assert mock_search.call_count > 0
        assert mock_search.call_args_list[0][1]['cv'] == 3
        assert mock_search.call_args_list[0][1]['param_grid'] == {}
        assert policy.model == 'mockmodel'
Пример #20
0
def test_generate_training_data_with_cycles(tmpdir, default_domain):
    featurizer = BinaryFeaturizer()
    training_data = extract_training_data_from_file(
        "data/test_stories/stories_with_cycle.md",
        default_domain,
        featurizer,
        augmentation_factor=0,
        max_history=4)

    assert training_data.num_examples() == 15

    np.testing.assert_array_equal(
        training_data.y, [2, 4, 0, 2, 4, 0, 1, 0, 2, 4, 0, 1, 0, 0, 3])
Пример #21
0
    def test_persist_and_load(self, trained_policy, default_domain, tmpdir):
        trained_policy.persist(tmpdir.strpath)
        loaded = trained_policy.__class__.load(tmpdir.strpath,
                                               trained_policy.featurizer,
                                               trained_policy.max_history)
        trackers = extract_trackers(
                DEFAULT_STORIES_FILE, default_domain, BinaryFeaturizer())

        for tracker in trackers:
            predicted_probabilities = loaded.predict_action_probabilities(
                    tracker, default_domain)
            actual_probabilities = trained_policy.predict_action_probabilities(
                    tracker, default_domain)
            assert predicted_probabilities == actual_probabilities
Пример #22
0
    def test_continue_training_with_unsuitable_model_raises(
            self, default_domain, data):
        policy = self.create_policy(
            featurizer=BinaryFeaturizer(),
            max_history=self.max_history,
            cv=None,
        )
        policy.train(data, domain=default_domain)

        with pytest.raises(TypeError) as exc:
            policy.continue_training(data, domain=default_domain)

        assert exc.value.args[0] == (
            "Continuing training is only possible with "
            "sklearn models that support 'partial_fit'.")
Пример #23
0
def test_load_training_data_handles_hidden_files(tmpdir, default_domain):
    # create a hidden file

    open(os.path.join(tmpdir.strpath, ".hidden"), 'a').close()
    # create a normal file
    normal_file = os.path.join(tmpdir.strpath, "normal_file")
    open(normal_file, 'a').close()

    data = training.extract_training_data(tmpdir.strpath,
                                          default_domain,
                                          featurizer=BinaryFeaturizer(),
                                          max_history=2)

    assert len(data.X) == 0
    assert len(data.y) == 0
Пример #24
0
def test_message_processor(default_domain, capsys):
    story_filename = "data/dsl_stories/stories_defaultdomain.md"
    ensemble = SimplePolicyEnsemble([ScoringPolicy()])
    interpreter = RegexInterpreter()

    PolicyTrainer(ensemble, default_domain,
                  BinaryFeaturizer()).train(story_filename, max_history=3)

    tracker_store = InMemoryTrackerStore(default_domain)
    processor = MessageProcessor(interpreter, ensemble, default_domain,
                                 tracker_store)

    processor.handle_message(UserMessage("_greet", ConsoleOutputChannel()))
    out, _ = capsys.readouterr()
    assert "hey there!" in out
Пример #25
0
def test_message_processor(default_domain, capsys):
    story_filename = "data/dsl_stories/stories_defaultdomain.md"
    ensemble = SimplePolicyEnsemble([ScoringPolicy()])
    interpreter = RegexInterpreter()

    PolicyTrainer(ensemble, default_domain,
                  BinaryFeaturizer()).train(story_filename, max_history=3)

    tracker_store = InMemoryTrackerStore(default_domain)
    processor = MessageProcessor(interpreter, ensemble, default_domain,
                                 tracker_store)

    out = CollectingOutputChannel()
    processor.handle_message(UserMessage("_greet[name=Core]", out))
    assert ("default", "hey there Core!") == out.latest_output()
Пример #26
0
def test_tracker_state_regression(default_domain):
    class HelloInterpreter(NaturalLanguageInterpreter):
        def parse(self, text):
            intent = "greet" if 'hello' in text else "nlu"
            return {"text": text, "intent": {"name": intent}, "entities": []}

    agent = Agent(domain, [SimplePolicy()],
                  BinaryFeaturizer(),
                  interpreter=HelloInterpreter())

    n_actions = []
    for i in range(0, 2):
        agent.handle_message("hello")
    tracker = agent.tracker_store.get_or_create_tracker('nlu')

    # Ensures that the tracker has changed between the utterances
    # (and wasn't reset in between them)
    expected = ("action_listen;"
                "_greet;utter_greet;action_listen;"
                "_greet;utter_greet;action_listen")
    assert ";".join([e.as_story_string() for e in tracker.events]) == expected
Пример #27
0
def train_data(max_history, domain):
    return extract_training_data_from_file(DEFAULT_STORIES_FILE,
                                           domain,
                                           BinaryFeaturizer(),
                                           max_history=max_history,
                                           remove_duplicates=True)
Пример #28
0
def test_create_train_data_with_history(default_domain):
    featurizer = BinaryFeaturizer()
    training_data = extract_training_data_from_file(DEFAULT_STORIES_FILE,
                                                    default_domain,
                                                    featurizer,
                                                    augmentation_factor=0,
                                                    max_history=4)
    assert training_data.X.shape == (11, 4, 10)
    decoded = [
        featurizer.decode(training_data.X[i, :, :],
                          default_domain.input_features) for i in range(0, 11)
    ]
    assert decoded == [[
        None, [(u'intent_greet', 1), (u'prev_action_listen', 1)],
        [(u'intent_greet', 1), (u'prev_utter_greet', 1)],
        [(u'intent_default', 1), (u'prev_action_listen', 1)]
    ],
                       [
                           None,
                           [(u'intent_greet', 1), (u'entity_name', 1),
                            (u'slot_name_0', 1), (u'prev_action_listen', 1)],
                           [(u'intent_greet', 1), (u'entity_name', 1),
                            (u'slot_name_0', 1), (u'prev_utter_greet', 1)],
                           [(u'intent_default', 1), (u'slot_name_0', 1),
                            (u'prev_action_listen', 1)]
                       ],
                       [[(u'intent_default', 1), (u'prev_action_listen', 1)],
                        [(u'intent_default', 1), (u'prev_utter_default', 1)],
                        [(u'intent_goodbye', 1), (u'prev_action_listen', 1)],
                        [(u'intent_goodbye', 1), (u'prev_utter_goodbye', 1)]],
                       [[(u'intent_greet', 1), (u'prev_utter_greet', 1)],
                        [(u'intent_default', 1), (u'prev_action_listen', 1)],
                        [(u'intent_default', 1), (u'prev_utter_default', 1)],
                        [(u'intent_goodbye', 1), (u'prev_action_listen', 1)]],
                       [[(u'intent_greet', 1), (u'prev_action_listen', 1)],
                        [(u'intent_greet', 1), (u'prev_utter_greet', 1)],
                        [(u'intent_default', 1), (u'prev_action_listen', 1)],
                        [(u'intent_default', 1), (u'prev_utter_default', 1)]],
                       [[(u'intent_greet', 1), (u'entity_name', 1),
                         (u'slot_name_0', 1), (u'prev_action_listen', 1)],
                        [(u'intent_greet', 1), (u'entity_name', 1),
                         (u'slot_name_0', 1), (u'prev_utter_greet', 1)],
                        [(u'intent_default', 1), (u'slot_name_0', 1),
                         (u'prev_action_listen', 1)],
                        [(u'intent_default', 1), (u'slot_name_0', 1),
                         (u'prev_utter_default', 1)]],
                       [
                           None, None,
                           [(u'intent_greet', 1), (u'prev_action_listen', 1)],
                           [(u'intent_greet', 1), (u'prev_utter_greet', 1)]
                       ],
                       [
                           None, None,
                           [(u'intent_greet', 1), (u'entity_name', 1),
                            (u'slot_name_0', 1), (u'prev_action_listen', 1)],
                           [(u'intent_greet', 1), (u'entity_name', 1),
                            (u'slot_name_0', 1), (u'prev_utter_greet', 1)]
                       ], [None, None, None, None],
                       [
                           None, None, None,
                           [(u'intent_greet', 1), (u'prev_action_listen', 1)]
                       ],
                       [
                           None, None, None,
                           [(u'intent_greet', 1), (u'entity_name', 1),
                            (u'slot_name_0', 1), (u'prev_action_listen', 1)]
                       ]]
def test_create_train_data_with_history(default_domain):
    featurizer = BinaryFeaturizer()
    X, y = extract_training_data_from_file(
            "data/dsl_stories/stories_defaultdomain.md",
            augmentation_factor=0,
            domain=default_domain,
            featurizer=featurizer,
            max_history=4
    )
    assert X.shape == (11, 4, 10)
    decoded = [featurizer.decode(X[i, :, :], default_domain.input_features)
               for i in range(0, 11)]
    assert decoded == [
        [
            None,
            [(u'intent_greet', 1), (u'prev_action_listen', 1)],
            [(u'intent_greet', 1), (u'prev_utter_greet', 1)],
            [(u'intent_default', 1), (u'prev_action_listen', 1)]],
        [
            None,
            [(u'intent_greet', 1), (u'entity_name', 1), (u'slot_name_0', 1),
             (u'prev_action_listen', 1)],
            [(u'intent_greet', 1), (u'entity_name', 1), (u'slot_name_0', 1),
             (u'prev_utter_greet', 1)],
            [(u'intent_default', 1), (u'slot_name_0', 1),
             (u'prev_action_listen', 1)]],
        [
            [(u'intent_default', 1), (u'prev_action_listen', 1)],
            [(u'intent_default', 1), (u'prev_utter_default', 1)],
            [(u'intent_goodbye', 1), (u'prev_action_listen', 1)],
            [(u'intent_goodbye', 1), (u'prev_utter_goodbye', 1)]],
        [
            [(u'intent_greet', 1), (u'prev_utter_greet', 1)],
            [(u'intent_default', 1), (u'prev_action_listen', 1)],
            [(u'intent_default', 1), (u'prev_utter_default', 1)],
            [(u'intent_goodbye', 1), (u'prev_action_listen', 1)]],
        [
            [(u'intent_greet', 1), (u'prev_action_listen', 1)],
            [(u'intent_greet', 1), (u'prev_utter_greet', 1)],
            [(u'intent_default', 1), (u'prev_action_listen', 1)],
            [(u'intent_default', 1), (u'prev_utter_default', 1)]],
        [
            [(u'intent_greet', 1), (u'entity_name', 1), (u'slot_name_0', 1),
             (u'prev_action_listen', 1)],
            [(u'intent_greet', 1), (u'entity_name', 1), (u'slot_name_0', 1),
             (u'prev_utter_greet', 1)],
            [(u'intent_default', 1), (u'slot_name_0', 1),
             (u'prev_action_listen', 1)],
            [(u'intent_default', 1), (u'slot_name_0', 1),
             (u'prev_utter_default', 1)]],
        [
            None,
            None,
            [(u'intent_greet', 1), (u'prev_action_listen', 1)],
            [(u'intent_greet', 1), (u'prev_utter_greet', 1)]],
        [
            None,
            None,
            [(u'intent_greet', 1), (u'entity_name', 1), (u'slot_name_0', 1),
             (u'prev_action_listen', 1)],
            [(u'intent_greet', 1), (u'entity_name', 1), (u'slot_name_0', 1),
             (u'prev_utter_greet', 1)]],
        [
            None, None, None, None],
        [
            None, None, None,
            [(u'intent_greet', 1), (u'prev_action_listen', 1)]],
        [
            None, None, None,
            [(u'intent_greet', 1), (u'entity_name', 1), (u'slot_name_0', 1),
             (u'prev_action_listen', 1)]]]
Пример #30
0
 def _create_featurizer(cls, featurizer):
     return featurizer if featurizer is not None else BinaryFeaturizer()