async def test_create_train_data_unfeaturized_entities(): domain_file = "data/test_domains/default_unfeaturized_entities.yml" stories_file = "data/test_stories/stories_unfeaturized_entities.md" domain = Domain.load(domain_file) featurizer = MaxHistoryTrackerFeaturizer(max_history=1) training_trackers = await training.load_data( stories_file, domain, augmentation_factor=0 ) assert len(training_trackers) == 2 (decoded, _) = featurizer.training_states_and_actions(training_trackers, domain) # decoded needs to be sorted hashed = [] for states in decoded: hashed.append(json.dumps(states, sort_keys=True)) hashed = sorted(hashed, reverse=True) assert hashed == [ "[{}]", '[{"intent_why": 1.0, "prev_utter_default": 1.0}]', '[{"intent_why": 1.0, "prev_action_listen": 1.0}]', '[{"intent_thank": 1.0, "prev_utter_default": 1.0}]', '[{"intent_thank": 1.0, "prev_action_listen": 1.0}]', '[{"intent_greet": 1.0, "prev_utter_greet": 1.0}]', '[{"intent_greet": 1.0, "prev_action_listen": 1.0}]', '[{"intent_goodbye": 1.0, "prev_utter_goodbye": 1.0}]', '[{"intent_goodbye": 1.0, "prev_action_listen": 1.0}]', '[{"entity_name": 1.0, "intent_greet": 1.0, "prev_utter_greet": 1.0}]', '[{"entity_name": 1.0, "intent_greet": 1.0, "prev_action_listen": 1.0}]', '[{"entity_name": 1.0, "entity_other": 1.0, "intent_default": 1.0, "prev_utter_default": 1.0}]', '[{"entity_name": 1.0, "entity_other": 1.0, "intent_default": 1.0, "prev_action_listen": 1.0}]', '[{"entity_name": 1.0, "entity_other": 1.0, "entity_unrelated_recognized_entity": 1.0, "intent_ask": 1.0, "prev_utter_default": 1.0}]', '[{"entity_name": 1.0, "entity_other": 1.0, "entity_unrelated_recognized_entity": 1.0, "intent_ask": 1.0, "prev_action_listen": 1.0}]', ]
async def test_create_train_data_with_history(default_domain): featurizer = MaxHistoryTrackerFeaturizer(max_history=4) training_trackers = await training.load_data( DEFAULT_STORIES_FILE, default_domain, augmentation_factor=0 ) assert len(training_trackers) == 3 (decoded, _) = featurizer.training_states_and_actions( training_trackers, default_domain ) # decoded needs to be sorted hashed = [] for states in decoded: hashed.append(json.dumps(states, sort_keys=True)) hashed = sorted(hashed) assert hashed == [ "[null, null, null, {}]", "[null, null, {}, " '{"entity_name": 1.0, "intent_greet": 1.0, ' '"prev_action_listen": 1.0, "slot_name_0": 1.0}]', "[null, null, {}, " '{"intent_greet": 1.0, "prev_action_listen": 1.0}]', "[null, {}, " '{"entity_name": 1.0, "intent_greet": 1.0, ' '"prev_action_listen": 1.0, "slot_name_0": 1.0}, ' '{"entity_name": 1.0, "intent_greet": 1.0, ' '"prev_utter_greet": 1.0, "slot_name_0": 1.0}]', "[null, {}, " '{"intent_greet": 1.0, "prev_action_listen": 1.0}, ' '{"intent_greet": 1.0, "prev_utter_greet": 1.0}]', '[{"entity_name": 1.0, "intent_greet": 1.0, ' '"prev_action_listen": 1.0, "slot_name_0": 1.0}, ' '{"entity_name": 1.0, "intent_greet": 1.0, ' '"prev_utter_greet": 1.0, "slot_name_0": 1.0}, ' '{"intent_default": 1.0, ' '"prev_action_listen": 1.0, "slot_name_0": 1.0}, ' '{"intent_default": 1.0, ' '"prev_utter_default": 1.0, "slot_name_0": 1.0}]', '[{"intent_default": 1.0, "prev_action_listen": 1.0}, ' '{"intent_default": 1.0, "prev_utter_default": 1.0}, ' '{"intent_goodbye": 1.0, "prev_action_listen": 1.0}, ' '{"intent_goodbye": 1.0, "prev_utter_goodbye": 1.0}]', '[{"intent_greet": 1.0, "prev_action_listen": 1.0}, ' '{"intent_greet": 1.0, "prev_utter_greet": 1.0}, ' '{"intent_default": 1.0, "prev_action_listen": 1.0}, ' '{"intent_default": 1.0, "prev_utter_default": 1.0}]', '[{"intent_greet": 1.0, "prev_utter_greet": 1.0}, ' '{"intent_default": 1.0, "prev_action_listen": 1.0}, ' '{"intent_default": 1.0, "prev_utter_default": 1.0}, ' '{"intent_goodbye": 1.0, "prev_action_listen": 1.0}]', '[{}, {"entity_name": 1.0, "intent_greet": 1.0, ' '"prev_action_listen": 1.0, "slot_name_0": 1.0}, ' '{"entity_name": 1.0, "intent_greet": 1.0, ' '"prev_utter_greet": 1.0, "slot_name_0": 1.0}, ' '{"intent_default": 1.0, ' '"prev_action_listen": 1.0, "slot_name_0": 1.0}]', '[{}, {"intent_greet": 1.0, "prev_action_listen": 1.0}, ' '{"intent_greet": 1.0, "prev_utter_greet": 1.0}, ' '{"intent_default": 1.0, "prev_action_listen": 1.0}]', ]
async def test_MaxHistoryTrackerFeaturizer(): # viz_domain(default_domain) default_domain = Domain.load("{}/domain_with_slots.yml".format(prj_dir)) stories_file = "{}/data/stories.md".format(prj_dir) trackers = await training.load_data( stories_file, default_domain, augmentation_factor=0, debug_plots=True ) viz_trackers(trackers) featurizer = MaxHistoryTrackerFeaturizer(max_history=5) (decoded, actions) = featurizer.training_states_and_actions(trackers, default_domain)
async def test_load_training_data_handles_hidden_files(tmpdir, default_domain): # create a hidden file Path(tmpdir / ".hidden").touch() # create a normal file Path(tmpdir / "normal_file").touch() featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=2) trackers = await training.load_data(tmpdir.strpath, default_domain) data = featurizer.featurize_trackers(trackers, default_domain) assert len(data.X) == 0 assert len(data.y) == 0
async def test_load_multi_file_training_data(default_domain): # the stories file in `data/test_multifile_stories` is the same as in # `data/test_stories/stories.md`, but split across multiple files featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=2) trackers = await training.load_data("data/test_stories/stories.md", default_domain, augmentation_factor=0) (tr_as_sts, tr_as_acts) = featurizer.training_states_and_actions( trackers, default_domain) hashed = [] for sts, acts in zip(tr_as_sts, tr_as_acts): hashed.append(json.dumps(sts + acts, sort_keys=True)) hashed = sorted(hashed, reverse=True) data = featurizer.featurize_trackers(trackers, default_domain) featurizer_mul = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=2) trackers_mul = await training.load_data("data/test_multifile_stories", default_domain, augmentation_factor=0) (tr_as_sts_mul, tr_as_acts_mul) = featurizer.training_states_and_actions( trackers_mul, default_domain) hashed_mul = [] for sts_mul, acts_mul in zip(tr_as_sts_mul, tr_as_acts_mul): hashed_mul.append(json.dumps(sts_mul + acts_mul, sort_keys=True)) hashed_mul = sorted(hashed_mul, reverse=True) data_mul = featurizer_mul.featurize_trackers(trackers_mul, default_domain) assert hashed == hashed_mul assert np.all(data.X.sort(axis=0) == data_mul.X.sort(axis=0)) assert np.all(data.y.sort(axis=0) == data_mul.y.sort(axis=0))
def _standard_featurizer(max_history: Optional[int] = None) -> "TrackerFeaturizer": if max_history is None: return FullDialogueTrackerFeaturizer(LabelTokenizerSingleStateFeaturizer()) else: return MaxHistoryTrackerFeaturizer( LabelTokenizerSingleStateFeaturizer(), max_history=max_history )
def train_dialogue(domain_file = 'customer_domain.yml', model_path = './models/dialogue', training_data_file = 'stories.md'): ''' This function uses the intent clasifier to build responses to sent messages input : domain_file : containing the actions, templates , entities necessary for underdtanding the dialogue.The file is located in the dialogue system model_path : directory for saving trained dialogue system training_data_file : contains the story file. It is training file needed to train rasa core dialogue system ''' featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=5) # define featurizer for learning # define a bot agent agent = Agent(domain_file, policies = [MemoizationPolicy(), KerasPolicy(featurizer, epochs=300, batch_size=10)]) # asyncio is needed for concurrent wait during training loop = asyncio.get_event_loop() # define asyncio data = loop.run_until_complete( agent.load_data(training_data_file )) # embedded data in asyncio # let agent train agent.train( data, validation=0.2) # persistent saving of model agent.persist(model_path) # return agent return agent
def _sliced_states_iterator( trackers: List[TrackerWithCachedStates], domain: Domain, max_history: int) -> Generator[TrackerEventStateTuple, None, None]: """Creates an iterator over sliced states. Iterate over all given trackers and all sliced states within each tracker, where the slicing is based on `max_history`. Args: trackers: List of trackers. domain: Domain (used for tracker.past_states). max_history: Assumed `max_history` value for slicing. Yields: A (tracker, event, sliced_states) triplet. """ for tracker in trackers: states = tracker.past_states(domain) states = [dict(state) for state in states] idx = 0 for event in tracker.events: if isinstance(event, ActionExecuted): sliced_states = MaxHistoryTrackerFeaturizer.slice_state_history( states[:idx + 1], max_history) yield TrackerEventStateTuple(tracker, event, sliced_states) idx += 1
async def test_load_training_data_handles_hidden_files(tmpdir, default_domain): # create a hidden file with open(os.path.join(tmpdir.strpath, ".hidden"), "a") as f: f.close() # create a normal file normal_file = os.path.join(tmpdir.strpath, "normal_file") with open(normal_file, "a") as f: f.close() featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=2) trackers = await training.load_data(tmpdir.strpath, default_domain) data = featurizer.featurize_trackers(trackers, default_domain) assert len(data.X) == 0 assert len(data.y) == 0
def _standard_featurizer(max_history=None): # Memoization policy always uses MaxHistoryTrackerFeaturizer # without state_featurizer return MaxHistoryTrackerFeaturizer( state_featurizer=None, max_history=max_history, use_intent_probabilities=False, )
async def test_generate_training_data_with_cycles(tmpdir, default_domain): featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=4) training_trackers = await training.load_data( "data/test_stories/stories_with_cycle.md", default_domain, augmentation_factor=0) training_data = featurizer.featurize_trackers(training_trackers, default_domain) y = training_data.y.argmax(axis=-1) # how many there are depends on the graph which is not created in a # deterministic way but should always be 3 or 4 assert len(training_trackers) == 3 or len(training_trackers) == 4 # if we have 4 trackers, there is going to be one example more for label 4 num_threes = len(training_trackers) - 1 # if new default actions are added the keys of the actions will be changed assert Counter(y) == {0: 6, 1: 2, 8: num_threes, 9: 1, 10: 3}
def _standard_featurizer(): return MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer())
def featurizer(self): featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=self.max_history) return featurizer
def _standard_featurizer(max_history=None) -> MaxHistoryTrackerFeaturizer: return MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=max_history)