def _standard_featurizer(max_history: Optional[int] = None) -> "TrackerFeaturizer": if max_history is None: return FullDialogueTrackerFeaturizer(LabelTokenizerSingleStateFeaturizer()) else: return MaxHistoryTrackerFeaturizer( LabelTokenizerSingleStateFeaturizer(), max_history=max_history )
def test_LabelTokenizerSingleStateFeaturizer(): f = LabelTokenizerSingleStateFeaturizer() f.user_labels = ["a_d"] f.bot_labels = ["c_b"] f.user_vocab = {"a": 0, "d": 1} f.bot_vocab = {"b": 1, "c": 0} f.num_features = len(f.user_vocab) + len(f.slot_labels) + len(f.bot_vocab) # Dictionary: 0 1 2 3 # "a" "d" "c" "b" encoded = f.encode({ "a_d": 1.0, "prev_c_b": 0.0, "e": 1.0, "prev_action_listen": 1.0 }) assert list(encoded) == [1, 1, 0, 0] # "a_d" -> ["a", "d"] -> 1 1 0 0 # "prev_c_b" -> ["prev", "c", "b"] --> only "c_b" count: --> [0 0 0 0] # "prev_action_listen" --> ["prev", "action", "listen"] --> ignored because not in labels encoded = f.encode({ "a_d": 1.7, "prev_c_b": 2.0, "e": 1.0, "prev_action_listen": 1.0 }) assert encoded == [1.7, 1.7, 2.0, 2.0]
def test_label_tokenizer_featurizer_uses_correct_dtype_float(): f = LabelTokenizerSingleStateFeaturizer() f.user_labels = ["a_d"] f.bot_labels = ["c_b"] f.user_vocab = {"a": 0, "d": 1} f.bot_vocab = {"b": 1, "c": 0} f.num_features = len(f.user_vocab) + len(f.slot_labels) + len(f.bot_vocab) encoded = f.encode({ "a_d": 0.2, "prev_c_b": 0.0, "prev_action_listen": 1.0 }) assert encoded.dtype == np.float64
def test_label_tokenizer_featurizer_handles_on_non_existing_features(): f = LabelTokenizerSingleStateFeaturizer() f.user_labels = ["a_d"] f.bot_labels = ["c_b"] f.user_vocab = {"a": 0, "d": 1} f.bot_vocab = {"b": 1, "c": 0} f.num_features = len(f.user_vocab) + len(f.slot_labels) + len(f.bot_vocab) encoded = f.encode({ "a_d": 1.0, "prev_c_b": 0.0, "e": 1.0, "prev_action_listen": 1.0 }) assert (encoded == np.array([1, 1, 0, 0])).all()
def test_label_tokenizer_featurizer_handles_probabilistic_intents(): f = LabelTokenizerSingleStateFeaturizer() f.user_labels = ["intent_a", "intent_d"] f.bot_labels = ["c", "b"] f.user_vocab = {"intent": 2, "a": 0, "d": 1} f.bot_vocab = {"b": 1, "c": 0} f.num_features = len(f.user_vocab) + len(f.slot_labels) + len(f.bot_vocab) encoded = f.encode({ "intent_a": 0.5, "prev_b": 0.2, "intent_d": 1.0, "prev_action_listen": 1.0 }) assert (encoded == np.array([0.5, 1.0, 1.5, 0.0, 0.2])).all()