示例#1
0
def test_persist_and_read_test_story(tmpdir, default_domain):
    graph = training.extract_story_graph("data/test_stories/stories.md",
                                         default_domain)
    out_path = tmpdir.join("persisted_story.md")
    Story(graph.story_steps).dump_to_file(out_path.strpath)

    recovered_trackers = training.load_data(
            out_path.strpath,
            default_domain,
            use_story_concatenation=False,
            tracker_limit=1000,
            remove_duplicates=False
    )
    existing_trackers = training.load_data(
            "data/test_stories/stories.md",
            default_domain,
            use_story_concatenation=False,
            tracker_limit=1000,
            remove_duplicates=False
    )
    existing_stories = {t.export_stories() for t in existing_trackers}
    for t in recovered_trackers:
        story_str = t.export_stories()
        assert story_str in existing_stories
        existing_stories.discard(story_str)
示例#2
0
def test_load_multi_file_training_data(default_domain):
    # the stories file in `data/test_multifile_stories` is the same as in
    # `data/test_stories/stories.md`, but split across multiple files
    featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(),
                                             max_history=2)
    trackers = training.load_data("data/test_stories/stories.md",
                                  default_domain,
                                  augmentation_factor=0)
    (tr_as_sts, tr_as_acts) = featurizer.training_states_and_actions(
        trackers, default_domain)
    hashed = []
    for sts, acts in zip(tr_as_sts, tr_as_acts):
        hashed.append(json.dumps(sts + acts, sort_keys=True))
    hashed = sorted(hashed, reverse=True)

    data = featurizer.featurize_trackers(trackers, default_domain)

    featurizer_mul = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(),
                                                 max_history=2)
    trackers_mul = training.load_data("data/test_multifile_stories",
                                      default_domain,
                                      augmentation_factor=0)
    (tr_as_sts_mul, tr_as_acts_mul) = featurizer.training_states_and_actions(
        trackers_mul, default_domain)
    hashed_mul = []
    for sts_mul, acts_mul in zip(tr_as_sts_mul, tr_as_acts_mul):
        hashed_mul.append(json.dumps(sts_mul + acts_mul, sort_keys=True))
    hashed_mul = sorted(hashed_mul, reverse=True)

    data_mul = featurizer_mul.featurize_trackers(trackers_mul, default_domain)

    assert hashed == hashed_mul

    assert np.all(data.X.sort(axis=0) == data_mul.X.sort(axis=0))
    assert np.all(data.y.sort(axis=0) == data_mul.y.sort(axis=0))
示例#3
0
def test_persist_and_read_test_story_graph(tmpdir, default_domain):
    graph = training.extract_story_graph("data/test_stories/stories.md",
                                         default_domain)
    out_path = tmpdir.join("persisted_story.md")
    with io.open(out_path.strpath, "w", encoding="utf-8") as f:
        f.write(graph.as_story_string())

    recovered_trackers = training.load_data(
        out_path.strpath,
        default_domain,
        use_story_concatenation=False,
        tracker_limit=1000,
        remove_duplicates=False
    )
    existing_trackers = training.load_data(
        "data/test_stories/stories.md",
        default_domain,
        use_story_concatenation=False,
        tracker_limit=1000,
        remove_duplicates=False
    )

    existing_stories = {t.export_stories() for t in existing_trackers}
    for t in recovered_trackers:
        story_str = t.export_stories()
        assert story_str in existing_stories
        existing_stories.discard(story_str)
示例#4
0
def test_persist_and_read_test_story(tmpdir, default_domain):
    graph = training.extract_story_graph("data/test_stories/stories.md",
                                         default_domain)
    out_path = tmpdir.join("persisted_story.md")
    Story(graph.story_steps).dump_to_file(out_path.strpath)

    recovered_trackers = training.load_data(
        out_path.strpath,
        default_domain,
        use_story_concatenation=False,
        tracker_limit=1000,
        remove_duplicates=False
    )
    existing_trackers = training.load_data(
        "data/test_stories/stories.md",
        default_domain,
        use_story_concatenation=False,
        tracker_limit=1000,
        remove_duplicates=False
    )
    existing_stories = {t.export_stories() for t in existing_trackers}
    for t in recovered_trackers:
        story_str = t.export_stories()
        assert story_str in existing_stories
        existing_stories.discard(story_str)
示例#5
0
def test_can_read_test_story(default_domain):
    trackers = training.load_data(
            "data/test_stories/stories.md",
            default_domain,
            use_story_concatenation=False,
            tracker_limit=1000,
            remove_duplicates=False
    )
    assert len(trackers) == 7
    # this should be the story simple_story_with_only_end -> show_it_all
    # the generated stories are in a non stable order - therefore we need to
    # do some trickery to find the one we want to test
    tracker = [t for t in trackers if len(t.events) == 5][0]
    assert tracker.events[0] == ActionExecuted("action_listen")
    assert tracker.events[1] == UserUttered(
            "simple",
            intent={"name": "simple", "confidence": 1.0},
            parse_data={'text': 'simple',
                        'intent_ranking': [{'confidence': 1.0,
                                            'name': 'simple'}],
                        'intent': {'confidence': 1.0, 'name': 'simple'},
                        'entities': []})
    assert tracker.events[2] == ActionExecuted("utter_default")
    assert tracker.events[3] == ActionExecuted("utter_greet")
    assert tracker.events[4] == ActionExecuted("action_listen")
示例#6
0
def test_create_train_data_no_history(default_domain):
    featurizer = MaxHistoryTrackerFeaturizer(max_history=1)
    training_trackers = training.load_data(DEFAULT_STORIES_FILE,
                                           default_domain,
                                           augmentation_factor=0)

    assert len(training_trackers) == 3
    (decoded,
     _) = featurizer.training_states_and_actions(training_trackers,
                                                 default_domain)

    # decoded needs to be sorted
    hashed = []
    for states in decoded:
        hashed.append(json.dumps(states, sort_keys=True))
    hashed = sorted(hashed, reverse=True)

    assert hashed == [
        '[{}]', '[{"intent_greet": 1.0, "prev_utter_greet": 1.0}]',
        '[{"intent_greet": 1.0, "prev_action_listen": 1.0}]',
        '[{"intent_goodbye": 1.0, "prev_utter_goodbye": 1.0}]',
        '[{"intent_goodbye": 1.0, "prev_action_listen": 1.0}]',
        '[{"intent_default": 1.0, "prev_utter_default": 1.0}]',
        '[{"intent_default": 1.0, "prev_utter_default": 1.0, '
        '"slot_name_0": 1.0}]',
        '[{"intent_default": 1.0, "prev_action_listen": 1.0}]',
        '[{"intent_default": 1.0, "prev_action_listen": 1.0, '
        '"slot_name_0": 1.0}]', '[{"entity_name": 1.0, "intent_greet": 1.0, '
        '"prev_utter_greet": 1.0, "slot_name_0": 1.0}]',
        '[{"entity_name": 1.0, "intent_greet": 1.0, '
        '"prev_action_listen": 1.0, "slot_name_0": 1.0}]'
    ]
示例#7
0
def test_create_train_data_no_history(default_domain):
    featurizer = MaxHistoryTrackerFeaturizer(max_history=1)
    training_trackers = training.load_data(
            DEFAULT_STORIES_FILE,
            default_domain,
            augmentation_factor=0
    )
    assert len(training_trackers) == 3
    (decoded, _) = featurizer.training_states_and_actions(
            training_trackers, default_domain)

    # decoded needs to be sorted
    hashed = []
    for states in decoded:
        hashed.append(json.dumps(states, sort_keys=True))
    hashed = sorted(hashed, reverse=True)

    assert hashed == [
        '[{}]',
        '[{"intent_greet": 1.0, "prev_utter_greet": 1.0}]',
        '[{"intent_greet": 1.0, "prev_action_listen": 1.0}]',
        '[{"intent_goodbye": 1.0, "prev_utter_goodbye": 1.0}]',
        '[{"intent_goodbye": 1.0, "prev_action_listen": 1.0}]',
        '[{"intent_default": 1.0, "prev_utter_default": 1.0}]',
        '[{"intent_default": 1.0, "prev_utter_default": 1.0, '
        '"slot_name_0": 1.0}]',
        '[{"intent_default": 1.0, "prev_action_listen": 1.0}]',
        '[{"intent_default": 1.0, "prev_action_listen": 1.0, '
        '"slot_name_0": 1.0}]',
        '[{"entity_name": 1.0, "intent_greet": 1.0, '
        '"prev_utter_greet": 1.0, "slot_name_0": 1.0}]',
        '[{"entity_name": 1.0, "intent_greet": 1.0, '
        '"prev_action_listen": 1.0, "slot_name_0": 1.0}]'
    ]
示例#8
0
def test_can_read_test_story(default_domain):
    trackers = training.load_data(
        "data/test_stories/stories.md",
        default_domain,
        use_story_concatenation=False,
        tracker_limit=1000,
        remove_duplicates=False
    )
    assert len(trackers) == 7
    # this should be the story simple_story_with_only_end -> show_it_all
    # the generated stories are in a non stable order - therefore we need to
    # do some trickery to find the one we want to test
    tracker = [t for t in trackers if len(t.events) == 5][0]
    assert tracker.events[0] == ActionExecuted("action_listen")
    assert tracker.events[1] == UserUttered(
        "simple",
        intent={"name": "simple", "confidence": 1.0},
        parse_data={'text': '/simple',
                    'intent_ranking': [{'confidence': 1.0,
                                        'name': 'simple'}],
                    'intent': {'confidence': 1.0, 'name': 'simple'},
                    'entities': []})
    assert tracker.events[2] == ActionExecuted("utter_default")
    assert tracker.events[3] == ActionExecuted("utter_greet")
    assert tracker.events[4] == ActionExecuted("action_listen")
示例#9
0
def train_trackers(domain, augmentation_factor=20):
    trackers = training.load_data(
        DEFAULT_STORIES_FILE,
        domain,
        augmentation_factor=augmentation_factor
    )
    return trackers
示例#10
0
def test_can_read_test_story_with_checkpoint_after_or(default_domain):
    trackers = training.load_data(
        "data/test_stories/stories_checkpoint_after_or.md",
        default_domain,
        use_story_concatenation=False,
        tracker_limit=1000,
        remove_duplicates=False)
    # there should be only 2 trackers
    assert len(trackers) == 2
示例#11
0
def test_can_read_test_story_with_checkpoint_after_or(default_domain):
    trackers = training.load_data(
            "data/test_stories/stories_checkpoint_after_or.md",
            default_domain,
            use_story_concatenation=False,
            tracker_limit=1000,
            remove_duplicates=False
    )
    # there should be only 2 trackers
    assert len(trackers) == 2
示例#12
0
def test_generate_training_data_with_unused_checkpoints(tmpdir,
                                                        default_domain):
    training_trackers = training.load_data(
        "data/test_stories/stories_unused_checkpoints.md",
        default_domain,
    )
    # there are 3 training stories:
    #   2 with unused end checkpoints -> training_trackers
    #   1 with unused start checkpoints -> ignored
    assert len(training_trackers) == 2
示例#13
0
def test_load_multi_file_training_data(default_domain):
    # the stories file in `data/test_multifile_stories` is the same as in
    # `data/test_stories/stories.md`, but split across multiple files
    featurizer = MaxHistoryTrackerFeaturizer(
        BinarySingleStateFeaturizer(), max_history=2)
    trackers = training.load_data(
        "data/test_stories/stories.md",
        default_domain,
        augmentation_factor=0
    )
    (tr_as_sts, tr_as_acts) = featurizer.training_states_and_actions(
                                        trackers, default_domain)
    hashed = []
    for sts, acts in zip(tr_as_sts, tr_as_acts):
        hashed.append(json.dumps(sts + acts, sort_keys=True))
    hashed = sorted(hashed, reverse=True)

    data = featurizer.featurize_trackers(trackers,
                                         default_domain)

    featurizer_mul = MaxHistoryTrackerFeaturizer(
        BinarySingleStateFeaturizer(), max_history=2)
    trackers_mul = training.load_data(
        "data/test_multifile_stories",
        default_domain,
        augmentation_factor=0
    )
    (tr_as_sts_mul, tr_as_acts_mul) = featurizer.training_states_and_actions(
                                        trackers_mul, default_domain)
    hashed_mul = []
    for sts_mul, acts_mul in zip(tr_as_sts_mul, tr_as_acts_mul):
        hashed_mul.append(json.dumps(sts_mul + acts_mul, sort_keys=True))
    hashed_mul = sorted(hashed_mul, reverse=True)

    data_mul = featurizer_mul.featurize_trackers(trackers_mul,
                                                 default_domain)

    assert hashed == hashed_mul

    assert np.all(data.X.sort(axis=0) == data_mul.X.sort(axis=0))
    assert np.all(data.y.sort(axis=0) == data_mul.y.sort(axis=0))
示例#14
0
    def load_data(
            self,
            resource_name,  # type: Text
            remove_duplicates=True,  # type: bool
            unique_last_num_states=None,  # type: Optional[int]
            augmentation_factor=20,  # type: int
            tracker_limit=None,  # type: Optional[int]
            use_story_concatenation=True,  # type: bool
            debug_plots=False,  # type: bool
            exclusion_percentage=None  # type: int
    ):
        # type: (...) -> List[DialogueStateTracker]
        """Load training data from a resource."""

        # find maximum max_history
        # and if all featurizers are MaxHistoryTrackerFeaturizer
        max_max_history = 0
        all_max_history_featurizers = True
        for policy in self.policy_ensemble.policies:
            if hasattr(policy.featurizer, 'max_history'):
                max_max_history = max(policy.featurizer.max_history,
                                      max_max_history)
            elif policy.featurizer is not None:
                all_max_history_featurizers = False

        if unique_last_num_states is None:
            # for speed up of data generation
            # automatically detect unique_last_num_states
            # if it was not set and
            # if all featurizers are MaxHistoryTrackerFeaturizer
            if all_max_history_featurizers:
                unique_last_num_states = max_max_history
        elif unique_last_num_states < max_max_history:
            # possibility of data loss
            logger.warning("unique_last_num_states={} but "
                           "maximum max_history={}."
                           "Possibility of data loss. "
                           "It is recommended to set "
                           "unique_last_num_states to "
                           "at least maximum max_history."
                           "".format(unique_last_num_states, max_max_history))

        return training.load_data(resource_name,
                                  self.domain,
                                  remove_duplicates,
                                  unique_last_num_states,
                                  augmentation_factor,
                                  tracker_limit,
                                  use_story_concatenation,
                                  debug_plots,
                                  exclusion_percentage=exclusion_percentage)
示例#15
0
    def load_data(self,
                  resource_name,  # type: Text
                  remove_duplicates=True,  # type: bool
                  augmentation_factor=20,  # type: int
                  max_number_of_trackers=2000,  # type: int
                  tracker_limit=None,  # type: Optional[int]
                  use_story_concatenation=True  # type: bool
                  ):
        # type: (...) -> List[DialogueStateTracker]
        """Load training data from a resource."""

        return training.load_data(resource_name, self.domain, remove_duplicates,
                                  augmentation_factor, max_number_of_trackers,
                                  tracker_limit, use_story_concatenation)
示例#16
0
    def test_memorise(self, trained_policy, default_domain):
        domain = Domain.load('data/test_domains/form.yml')
        trackers = training.load_data('data/test_stories/stories_form.md',
                                      domain)
        trained_policy.train(trackers, domain)

        (all_states, all_actions) = \
            trained_policy.featurizer.training_states_and_actions(
                trackers, domain)

        for tracker, states, actions in zip(trackers, all_states, all_actions):
            for state in states:
                if state is not None:
                    # check that 'form: inform' was ignored
                    assert 'intent_inform' not in state.keys()
            recalled = trained_policy.recall(states, tracker, domain)
            active_form = trained_policy._get_active_form_name(states[-1])

            if states[0] is not None and states[-1] is not None:
                # explicitly set intents and actions before listen after
                # which FormPolicy should not predict a form action and
                # should add FormValidation(False) event
                is_no_validation = (
                    ('prev_some_form' in states[0].keys() and
                     'intent_default' in states[-1].keys()) or
                    ('prev_some_form' in states[0].keys() and
                     'intent_stop' in states[-1].keys()) or
                    ('prev_utter_ask_continue' in states[0].keys() and
                     'intent_affirm' in states[-1].keys()) or
                    ('prev_utter_ask_continue' in states[0].keys() and
                     'intent_deny' in states[-1].keys())
                )
            else:
                is_no_validation = False

            if 'intent_start_form' in states[-1]:
                # explicitly check that intent that starts the form
                # is not memorized as non validation intent
                assert recalled is None
            elif is_no_validation:
                assert recalled == active_form
            else:
                assert recalled is None

        nums = np.random.randn(domain.num_states)
        random_states = [{f: num
                          for f, num in
                          zip(domain.input_states, nums)}]
        assert trained_policy.recall(random_states, None, domain) is None
示例#17
0
def test_generate_training_data_original_and_augmented_trackers(
        default_domain):
    training_trackers = training.load_data(
        "data/test_stories/stories_defaultdomain.md",
        default_domain,
        augmentation_factor=3)
    # there are three original stories
    # augmentation factor of 3 indicates max of 3*10 augmented stories generated
    # maximum number of stories should be augmented+original = 33
    original_trackers = [
        t for t in training_trackers
        if not hasattr(t, 'is_augmented') or not t.is_augmented
    ]
    assert len(original_trackers) == 3
    assert len(training_trackers) <= 33
示例#18
0
def test_load_training_data_handles_hidden_files(tmpdir, default_domain):
    # create a hidden file

    open(os.path.join(tmpdir.strpath, ".hidden"), 'a').close()
    # create a normal file
    normal_file = os.path.join(tmpdir.strpath, "normal_file")
    open(normal_file, 'a').close()

    featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(),
                                             max_history=2)
    trackers = training.load_data(tmpdir.strpath, default_domain)
    data = featurizer.featurize_trackers(trackers, default_domain)

    assert len(data.X) == 0
    assert len(data.y) == 0
示例#19
0
def test_tracker_write_to_story(tmpdir, default_domain):
    tracker = tracker_from_dialogue_file("data/test_dialogues/enter_name.json",
                                         default_domain)
    p = tmpdir.join("export.md")
    tracker.export_stories_to_file(p.strpath)
    trackers = training.load_data(p.strpath,
                                  default_domain,
                                  use_story_concatenation=False,
                                  tracker_limit=1000,
                                  remove_duplicates=False)
    assert len(trackers) == 1
    recovered = trackers[0]
    assert len(recovered.events) == 7
    assert recovered.events[5].type_name == "slot"
    assert recovered.events[5].key == "name"
    assert recovered.events[5].value == "holger"
示例#20
0
    def load_data(self,
                  resource_name,  # type: Text
                  remove_duplicates=True,  # type: bool
                  unique_last_num_states=None,  # type: Optional[int]
                  augmentation_factor=20,  # type: int
                  max_number_of_trackers=None,  # deprecated
                  tracker_limit=None,  # type: Optional[int]
                  use_story_concatenation=True,  # type: bool
                  debug_plots=False  # type: bool
                  ):
        # type: (...) -> List[DialogueStateTracker]
        """Load training data from a resource."""

        # find maximum max_history
        # and if all featurizers are MaxHistoryTrackerFeaturizer
        max_max_history = 0
        all_max_history_featurizers = True
        for policy in self.policy_ensemble.policies:
            if hasattr(policy.featurizer, 'max_history'):
                max_max_history = max(policy.featurizer.max_history,
                                      max_max_history)
            elif policy.featurizer is not None:
                all_max_history_featurizers = False

        if unique_last_num_states is None:
            # for speed up of data generation
            # automatically detect unique_last_num_states
            # if it was not set and
            # if all featurizers are MaxHistoryTrackerFeaturizer
            if all_max_history_featurizers:
                unique_last_num_states = max_max_history
        elif unique_last_num_states < max_max_history:
            # possibility of data loss
            logger.warning("unique_last_num_states={} but "
                           "maximum max_history={}."
                           "Possibility of data loss. "
                           "It is recommended to set "
                           "unique_last_num_states to "
                           "at least maximum max_history."
                           "".format(unique_last_num_states, max_max_history))

        return training.load_data(resource_name, self.domain,
                                  remove_duplicates, unique_last_num_states,
                                  augmentation_factor, max_number_of_trackers,
                                  tracker_limit, use_story_concatenation,
                                  debug_plots)
示例#21
0
def test_tracker_write_to_story(tmpdir, moodbot_domain):
    tracker = tracker_from_dialogue_file("data/test_dialogues/moodbot.json",
                                         moodbot_domain)
    p = tmpdir.join("export.md")
    tracker.export_stories_to_file(p.strpath)
    trackers = training.load_data(p.strpath,
                                  moodbot_domain,
                                  use_story_concatenation=False,
                                  tracker_limit=1000,
                                  remove_duplicates=False)
    assert len(trackers) == 1
    recovered = trackers[0]
    assert len(recovered.events) == 11
    assert recovered.events[4].type_name == "user"
    assert recovered.events[4].intent == {
        'confidence': 1.0,
        'name': 'mood_unhappy'
    }
示例#22
0
def test_tracker_write_to_story(tmpdir, default_domain):
    tracker = tracker_from_dialogue_file(
            "data/test_dialogues/enter_name.json", default_domain)
    p = tmpdir.join("export.md")
    tracker.export_stories_to_file(p.strpath)
    trackers = training.load_data(
            p.strpath,
            default_domain,
            use_story_concatenation=False,
            tracker_limit=1000,
            remove_duplicates=False
    )
    assert len(trackers) == 1
    recovered = trackers[0]
    assert len(recovered.events) == 7
    assert recovered.events[5].type_name == "slot"
    assert recovered.events[5].key == "name"
    assert recovered.events[5].value == "holger"
示例#23
0
def test_concerts_online_example(tmpdir):
    sys.path.append("examples/concertbot/")
    from train_online import train_agent
    from rasa_core import utils

    story_path = tmpdir.join("stories.md").strpath

    with utilities.cwd("examples/concertbot"):
        msgs = iter(["/greet", "/greet", "/greet"])
        msgs_f = functools.partial(next, msgs)

        with utilities.mocked_cmd_input(
                utils,
                text=[
                    "2",  # action is wrong
                    "5",  # choose utter_goodbye action
                    "1"  # yes, action_listen is correct.
                ] * 2 + [  # repeat this twice
                    "0",  # export
                    story_path  # file path to export to
                ]):
            agent = train_agent()

            responses = agent.handle_text("/greet", sender_id="user1")
            assert responses[-1]['text'] == "hey there!"

            online.serve_agent(agent, get_next_message=msgs_f)

            # the model should have been retrained and the model should now
            # directly respond with goodbye
            responses = agent.handle_text("/greet", sender_id="user2")
            assert responses[-1]['text'] == "goodbye :("

            assert os.path.exists(story_path)
            print(utils.read_file(story_path))

            t = training.load_data(story_path,
                                   agent.domain,
                                   use_story_concatenation=False)
            assert len(t) == 1
            assert len(t[0].events) == 9
            assert t[0].events[5] == ActionExecuted("utter_goodbye")
            assert t[0].events[6] == ActionExecuted("action_listen")
示例#24
0
def test_generate_training_data_with_cycles(tmpdir, default_domain):
    featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(),
                                             max_history=4)
    training_trackers = training.load_data(
        "data/test_stories/stories_with_cycle.md",
        default_domain,
        augmentation_factor=0)

    training_data = featurizer.featurize_trackers(training_trackers,
                                                  default_domain)
    y = training_data.y.argmax(axis=-1)

    # how many there are depends on the graph which is not created in a
    # deterministic way but should always be 3 or
    assert len(training_trackers) == 3 or len(training_trackers) == 4

    # if we have 4 trackers, there is going to be one example more for label 2
    num_twos = len(training_trackers) - 1
    assert Counter(y) == {0: 6, 1: 2, 2: num_twos, 3: 1, 4: 3}
示例#25
0
def test_load_training_data_handles_hidden_files(tmpdir, default_domain):
    # create a hidden file

    open(os.path.join(tmpdir.strpath, ".hidden"), 'a').close()
    # create a normal file
    normal_file = os.path.join(tmpdir.strpath, "normal_file")
    open(normal_file, 'a').close()

    featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(),
                                             max_history=2)
    trackers = training.load_data(
        tmpdir.strpath,
        default_domain
    )
    data = featurizer.featurize_trackers(trackers,
                                         default_domain)

    assert len(data.X) == 0
    assert len(data.y) == 0
示例#26
0
def test_generate_training_data_with_cycles(tmpdir, default_domain):
    featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(),
                                             max_history=4)
    training_trackers = training.load_data(
        "data/test_stories/stories_with_cycle.md",
        default_domain,
        augmentation_factor=0
    )

    training_data = featurizer.featurize_trackers(training_trackers,
                                                  default_domain)
    y = training_data.y.argmax(axis=-1)

    # how many there are depends on the graph which is not created in a
    # deterministic way but should always be 3 or
    assert len(training_trackers) == 3 or len(training_trackers) == 4

    # if we have 4 trackers, there is going to be one example more for label 3
    num_threes = len(training_trackers) - 1
    assert Counter(y) == {0: 6, 1: 2, 3: num_threes, 4: 1, 5: 3}
示例#27
0
def test_concerts_online_example(tmpdir):
    sys.path.append("examples/concertbot/")
    from train_online import train_agent
    from rasa_core import utils

    story_path = tmpdir.join("stories.md").strpath

    with utilities.cwd("examples/concertbot"):
        msgs = iter(["/greet", "/greet", "/greet"])
        msgs_f = functools.partial(next, msgs)

        with utilities.mocked_cmd_input(
                utils,
                text=["2",  # action is wrong
                      "5",  # choose utter_goodbye action
                      "1"  # yes, action_listen is correct.
                      ] * 2 + [  # repeat this twice
                         "0",  # export
                         story_path  # file path to export to
                     ]):
            agent = train_agent()

            responses = agent.handle_text("/greet", sender_id="user1")
            assert responses[-1]['text'] == "hey there!"

            online.serve_agent(agent, get_next_message=msgs_f)

            # the model should have been retrained and the model should now
            # directly respond with goodbye
            responses = agent.handle_text("/greet", sender_id="user2")
            assert responses[-1]['text'] == "goodbye :("

            assert os.path.exists(story_path)
            print(utils.read_file(story_path))

            t = training.load_data(story_path, agent.domain,
                                   use_story_concatenation=False)
            assert len(t) == 1
            assert len(t[0].events) == 9
            assert t[0].events[5] == ActionExecuted("utter_goodbye")
            assert t[0].events[6] == ActionExecuted("action_listen")
示例#28
0
    def load_data(
            self,
            resource_name: Text,
            remove_duplicates: bool = True,
            unique_last_num_states: Optional[int] = None,
            augmentation_factor: int = 20,
            tracker_limit: Optional[int] = None,
            use_story_concatenation: bool = True,
            debug_plots: bool = False,
            exclusion_percentage: int = None) -> List[DialogueStateTracker]:
        """Load training data from a resource."""

        max_history = self._max_history()

        if unique_last_num_states is None:
            # for speed up of data generation
            # automatically detect unique_last_num_states
            # if it was not set and
            # if all featurizers are MaxHistoryTrackerFeaturizer
            if self._are_all_featurizers_using_a_max_history():
                unique_last_num_states = max_history
        elif unique_last_num_states < max_history:
            # possibility of data loss
            logger.warning("unique_last_num_states={} but "
                           "maximum max_history={}."
                           "Possibility of data loss. "
                           "It is recommended to set "
                           "unique_last_num_states to "
                           "at least maximum max_history."
                           "".format(unique_last_num_states, max_history))

        return training.load_data(resource_name,
                                  self.domain,
                                  remove_duplicates,
                                  unique_last_num_states,
                                  augmentation_factor,
                                  tracker_limit,
                                  use_story_concatenation,
                                  debug_plots,
                                  exclusion_percentage=exclusion_percentage)
示例#29
0
def train_trackers(domain):
    trackers = training.load_data(DEFAULT_STORIES_FILE, domain)
    return trackers
示例#30
0
def record_messages(
        endpoint,  # type: EndpointConfig
        sender_id=UserMessage.DEFAULT_SENDER_ID,  # type: Text
        max_message_limit=None,  # type: Optional[int]
        on_finish=None,  # type: Optional[Callable[[], None]]
        finetune=False,  # type: bool
        stories=None,  # type: Optional[Text]
        skip_visualization=False  # type: bool
):
    """Read messages from the command line and print bot responses."""

    from rasa_core import training

    try:
        _print_help(skip_visualization)

        try:
            domain = retrieve_domain(endpoint)
        except requests.exceptions.ConnectionError:
            logger.exception("Failed to connect to rasa core server at '{}'. "
                             "Is the server running?".format(endpoint.url))
            return

        trackers = training.load_data(
            stories,
            Domain.from_dict(domain),
            augmentation_factor=0,
            use_story_concatenation=False,
        )

        intents = [next(iter(i)) for i in (domain.get("intents") or [])]

        num_messages = 0
        sender_ids = [t.events for t in trackers] + [sender_id]

        if not skip_visualization:
            plot_file = "story_graph.dot"
            _plot_trackers(sender_ids, plot_file, endpoint)
        else:
            plot_file = None

        while not utils.is_limit_reached(num_messages, max_message_limit):
            try:
                if is_listening_for_message(sender_id, endpoint):
                    _enter_user_message(sender_id, endpoint)
                    _validate_nlu(intents, endpoint, sender_id)
                _predict_till_next_listen(endpoint, sender_id, finetune,
                                          sender_ids, plot_file)

                num_messages += 1
            except RestartConversation:
                send_event(endpoint, sender_id, {"event": "restart"})
                send_event(endpoint, sender_id, {
                    "event": "action",
                    "name": ACTION_LISTEN_NAME
                })

                logger.info("Restarted conversation, starting a new one.")
            except UndoLastStep:
                _undo_latest(sender_id, endpoint)
                _print_history(sender_id, endpoint)
            except ForkTracker:
                _print_history(sender_id, endpoint)

                evts = _request_fork_from_user(sender_id, endpoint)
                sender_id = uuid.uuid4().hex

                if evts is not None:
                    replace_events(endpoint, sender_id, evts)
                    sender_ids.append(sender_id)
                    _print_history(sender_id, endpoint)
                    _plot_trackers(sender_ids, plot_file, endpoint)

    except Exception:
        logger.exception("An exception occurred while recording messages.")
        raise
    finally:
        if on_finish:
            on_finish()
示例#31
0
def train_trackers(domain):
    trackers = training.load_data(
        DEFAULT_STORIES_FILE,
        domain
    )
    return trackers