async def test_create_train_data_with_history(default_domain): featurizer = MaxHistoryTrackerFeaturizer(max_history=4) training_trackers = await training.load_data(DEFAULT_STORIES_FILE, default_domain, augmentation_factor=0) assert len(training_trackers) == 3 (decoded, _) = featurizer.training_states_and_actions(training_trackers, default_domain) # decoded needs to be sorted hashed = [] for states in decoded: hashed.append(json.dumps(states, sort_keys=True)) hashed = sorted(hashed) assert hashed == [ '[{"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"entities": ["name"], "intent": "greet"}}, {"prev_action": {"action_name": "utter_greet"}, "slots": {"name": [1.0]}, "user": {"entities": ["name"], "intent": "greet"}}, {"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"intent": "default"}}, {"prev_action": {"action_name": "utter_default"}, "slots": {"name": [1.0]}, "user": {"intent": "default"}}]', '[{"prev_action": {"action_name": "action_listen"}, "user": {"intent": "default"}}, {"prev_action": {"action_name": "utter_default"}, "user": {"intent": "default"}}, {"prev_action": {"action_name": "action_listen"}, "user": {"intent": "goodbye"}}, {"prev_action": {"action_name": "utter_goodbye"}, "user": {"intent": "goodbye"}}]', '[{"prev_action": {"action_name": "action_listen"}, "user": {"intent": "greet"}}, {"prev_action": {"action_name": "utter_greet"}, "user": {"intent": "greet"}}, {"prev_action": {"action_name": "action_listen"}, "user": {"intent": "default"}}, {"prev_action": {"action_name": "utter_default"}, "user": {"intent": "default"}}]', '[{"prev_action": {"action_name": "utter_greet"}, "user": {"intent": "greet"}}, {"prev_action": {"action_name": "action_listen"}, "user": {"intent": "default"}}, {"prev_action": {"action_name": "utter_default"}, "user": {"intent": "default"}}, {"prev_action": {"action_name": "action_listen"}, "user": {"intent": "goodbye"}}]', '[{}, {"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"entities": ["name"], "intent": "greet"}}, {"prev_action": {"action_name": "utter_greet"}, "slots": {"name": [1.0]}, "user": {"entities": ["name"], "intent": "greet"}}, {"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"intent": "default"}}]', '[{}, {"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"entities": ["name"], "intent": "greet"}}, {"prev_action": {"action_name": "utter_greet"}, "slots": {"name": [1.0]}, "user": {"entities": ["name"], "intent": "greet"}}]', '[{}, {"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"entities": ["name"], "intent": "greet"}}]', '[{}, {"prev_action": {"action_name": "action_listen"}, "user": {"intent": "greet"}}, {"prev_action": {"action_name": "utter_greet"}, "user": {"intent": "greet"}}, {"prev_action": {"action_name": "action_listen"}, "user": {"intent": "default"}}]', '[{}, {"prev_action": {"action_name": "action_listen"}, "user": {"intent": "greet"}}, {"prev_action": {"action_name": "utter_greet"}, "user": {"intent": "greet"}}]', '[{}, {"prev_action": {"action_name": "action_listen"}, "user": {"intent": "greet"}}]', "[{}]", ]
async def test_create_train_data_no_history(domain: Domain, stories_path: Text): featurizer = MaxHistoryTrackerFeaturizer(max_history=1) training_trackers = await training.load_data(stories_path, domain, augmentation_factor=0) assert len(training_trackers) == 4 (decoded, _) = featurizer.training_states_and_actions(training_trackers, domain) # decoded needs to be sorted hashed = [] for states in decoded: hashed.append(json.dumps(states, sort_keys=True)) hashed = sorted(hashed, reverse=True) assert hashed == [ "[{}]", '[{"prev_action": {"action_name": "utter_greet"}, "user": {"intent": "greet"}}]', '[{"prev_action": {"action_name": "utter_greet"}, "slots": {"name": [1.0]}, "user": {"entities": ["name"], "intent": "greet"}}]', '[{"prev_action": {"action_name": "utter_goodbye"}, "user": {"intent": "goodbye"}}]', '[{"prev_action": {"action_name": "utter_default"}, "user": {"intent": "default"}}]', '[{"prev_action": {"action_name": "utter_default"}, "slots": {"name": [1.0]}, "user": {"intent": "default"}}]', '[{"prev_action": {"action_name": "action_listen"}, "user": {"intent": "greet"}}]', '[{"prev_action": {"action_name": "action_listen"}, "user": {"intent": "goodbye"}}]', '[{"prev_action": {"action_name": "action_listen"}, "user": {"intent": "default"}}]', '[{"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"intent": "default"}}]', '[{"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"entities": ["name"], "intent": "greet"}}]', ]
def test_slots_states_before_user_utterance(default_domain): featurizer = MaxHistoryTrackerFeaturizer() tracker = DialogueStateTracker.from_events( "bla", evts=[ SlotSet(default_domain.slots[0].name, "some_value"), ActionExecuted("utter_default"), ], slots=default_domain.slots, ) trackers_as_states, _ = featurizer.training_states_and_actions( [tracker], default_domain) expected_states = [[{"slots": {"name": (1.0, )}}]] assert trackers_as_states == expected_states
async def test_create_train_data_unfeaturized_entities(): import copy domain_file = "data/test_domains/default_unfeaturized_entities.yml" stories_file = "data/test_stories/stories_unfeaturized_entities.md" domain = Domain.load(domain_file) featurizer = MaxHistoryTrackerFeaturizer(max_history=1) training_trackers = await training.load_data(stories_file, domain, augmentation_factor=0) assert len(training_trackers) == 2 (decoded, _) = featurizer.training_states_and_actions(training_trackers, domain) # decoded needs to be sorted hashed = [] for states in decoded: new_states = [ check_for_too_many_entities_and_remove_them(state) for state in states ] hashed.append(json.dumps(new_states, sort_keys=True)) hashed = sorted(hashed, reverse=True) assert hashed == [ "[{}]", '[{"prev_action": {"action_name": "utter_greet"}, "user": {"intent": "greet"}}]', '[{"prev_action": {"action_name": "utter_greet"}, "user": {"entities": ["name"], "intent": "greet"}}]', '[{"prev_action": {"action_name": "utter_goodbye"}, "user": {"intent": "goodbye"}}]', '[{"prev_action": {"action_name": "utter_default"}, "user": {"intent": "why"}}]', '[{"prev_action": {"action_name": "utter_default"}, "user": {"intent": "thank"}}]', '[{"prev_action": {"action_name": "utter_default"}, "user": {"entities": [], "intent": "default"}}]', '[{"prev_action": {"action_name": "utter_default"}, "user": {"entities": [], "intent": "ask"}}]', '[{"prev_action": {"action_name": "action_listen"}, "user": {"intent": "why"}}]', '[{"prev_action": {"action_name": "action_listen"}, "user": {"intent": "thank"}}]', '[{"prev_action": {"action_name": "action_listen"}, "user": {"intent": "greet"}}]', '[{"prev_action": {"action_name": "action_listen"}, "user": {"intent": "goodbye"}}]', '[{"prev_action": {"action_name": "action_listen"}, "user": {"entities": [], "intent": "default"}}]', '[{"prev_action": {"action_name": "action_listen"}, "user": {"entities": [], "intent": "ask"}}]', '[{"prev_action": {"action_name": "action_listen"}, "user": {"entities": ["name"], "intent": "greet"}}]', ]
async def test_load_multi_file_training_data( stories_resources: List, default_domain: Domain ): # the stories file in `data/test_multifile_stories` is the same as in # `data/test_stories/stories.md`, but split across multiple files featurizer = MaxHistoryTrackerFeaturizer(SingleStateFeaturizer(), max_history=2) trackers = await training.load_data( stories_resources[0], default_domain, augmentation_factor=0 ) (tr_as_sts, tr_as_acts) = featurizer.training_states_and_actions( trackers, default_domain ) hashed = [] for sts, acts in zip(tr_as_sts, tr_as_acts): hashed.append(json.dumps(sts + acts, sort_keys=True)) hashed = sorted(hashed, reverse=True) data, label_ids = featurizer.featurize_trackers( trackers, default_domain, interpreter=RegexInterpreter() ) featurizer_mul = MaxHistoryTrackerFeaturizer(SingleStateFeaturizer(), max_history=2) trackers_mul = await training.load_data( stories_resources[1], default_domain, augmentation_factor=0 ) (tr_as_sts_mul, tr_as_acts_mul) = featurizer.training_states_and_actions( trackers_mul, default_domain ) hashed_mul = [] for sts_mul, acts_mul in zip(tr_as_sts_mul, tr_as_acts_mul): hashed_mul.append(json.dumps(sts_mul + acts_mul, sort_keys=True)) hashed_mul = sorted(hashed_mul, reverse=True) data_mul, label_ids_mul = featurizer_mul.featurize_trackers( trackers_mul, default_domain, interpreter=RegexInterpreter() ) assert hashed == hashed_mul # we check for intents, action names and entities -- the features which # are included in the story files data = surface_attributes(data) data_mul = surface_attributes(data_mul) for attribute in [INTENT, ACTION_NAME, ENTITIES]: if attribute not in data or attribute not in data_mul: continue assert len(data.get(attribute)) == len(data_mul.get(attribute)) for idx_tracker in range(len(data.get(attribute))): for idx_dialogue in range(len(data.get(attribute)[idx_tracker])): f1 = data.get(attribute)[idx_tracker][idx_dialogue] f2 = data_mul.get(attribute)[idx_tracker][idx_dialogue] if f1 is None or f2 is None: assert f1 == f2 continue for idx_turn in range(len(f1)): f1 = data.get(attribute)[idx_tracker][idx_dialogue][idx_turn] f2 = data_mul.get(attribute)[idx_tracker][idx_dialogue][idx_turn] assert np.all((f1 == f2).data) assert np.all(label_ids == label_ids_mul)