def test_similarities_collection_for_label_ids(self): label_ids = np.array([[0, 1], [1, -1], [2, -1]]) outputs = { "similarities": np.array( [[[1.2, 0.3, 0.2]], [[0.5, 0.2, 1.6]], [[0.01, 0.1, 1.7]],] ) } label_id_similarities = UnexpecTEDIntentPolicy._collect_label_id_grouped_scores( outputs, label_ids ) # Should contain similarities for all label ids except padding token. assert sorted(list(label_id_similarities.keys())) == [0, 1, 2] # Cross-check that the collected similarities are correct for each label id. assert label_id_similarities[0] == { POSITIVE_SCORES_KEY: [1.2], NEGATIVE_SCORES_KEY: [0.5, 0.01], } assert label_id_similarities[1] == { POSITIVE_SCORES_KEY: [0.3, 0.2], NEGATIVE_SCORES_KEY: [0.1], } assert label_id_similarities[2] == { POSITIVE_SCORES_KEY: [1.7], NEGATIVE_SCORES_KEY: [0.2, 1.6], }
def test_label_data_assembly( self, trained_policy: UnexpecTEDIntentPolicy, default_domain: Domain ): # Construct input data state_featurizer = trained_policy.featurizer.state_featurizer encoded_all_labels = state_featurizer.encode_all_labels( default_domain, precomputations=None ) attribute_data, _ = model_data_utils.convert_to_data_format(encoded_all_labels) assembled_label_data = trained_policy._assemble_label_data( attribute_data, default_domain ) assembled_label_data_signature = assembled_label_data.get_signature() assert list(assembled_label_data_signature.keys()) == [ f"{LABEL}_{INTENT}", LABEL, ] assert assembled_label_data.num_examples == len(default_domain.intents) assert list(assembled_label_data_signature[f"{LABEL}_{INTENT}"].keys()) == [ MASK, SENTENCE, ] assert list(assembled_label_data_signature[LABEL].keys()) == [IDS] assert assembled_label_data_signature[f"{LABEL}_{INTENT}"][SENTENCE][ 0 ].units == len(default_domain.intents)
def test_train_with_e2e_data( default_model_storage: ModelStorage, default_execution_context: ExecutionContext, tracker_events: List[List[Event]], skip_training: bool, domain: Domain, ): policy = UnexpecTEDIntentPolicy( UnexpecTEDIntentPolicy.get_default_config(), default_model_storage, Resource("UnexpecTEDIntentPolicy"), default_execution_context, featurizer=IntentMaxHistoryTrackerFeaturizer( IntentTokenizerSingleStateFeaturizer() ), ) trackers_for_training = [ TrackerWithCachedStates.from_events( sender_id=f"{tracker_index}", evts=events, domain=domain ) for tracker_index, events in enumerate(tracker_events) ] if skip_training: with pytest.warns(UserWarning): policy.train(trackers_for_training, domain, precomputations=None) else: policy.train(trackers_for_training, domain, precomputations=None)
def test_prepared_data_for_threshold_prediction( self, trained_policy: UnexpecTEDIntentPolicy, default_domain: Domain, stories_path: Path, ): training_trackers = train_trackers(default_domain, stories_path, augmentation_factor=0) training_model_data, _ = trained_policy._prepare_for_training( training_trackers, default_domain, precomputations=None) data_for_prediction = trained_policy._prepare_data_for_prediction( training_model_data) assert set( data_for_prediction.data.keys()).issubset(PREDICTION_FEATURES)
def test_pick_thresholds_for_labels( self, tolerance: float, expected_thresholds: List[float] ): label_id_tolerance_thresholds = { 0: [ 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, -0.1, -0.1, -0.5, -0.5, -1.2, -1.2, -2.3, -2.3, -2.7, -2.9, -3.2, -3.2, -4.1, -4.1, ], 3: [ -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, ], 4: [0.2 - (index * 0.3) for index in range(20)], } thresholds = UnexpecTEDIntentPolicy._pick_thresholds( label_id_tolerance_thresholds, tolerance ) assert sorted(list(thresholds.keys())) == sorted( list(label_id_tolerance_thresholds.keys()) ) computed_values = list(thresholds.values()) assert expected_thresholds == computed_values
def test_ranking_length_and_renormalization( self, trained_policy: UnexpecTEDIntentPolicy, tracker: DialogueStateTracker, default_domain: Domain, ): precomputations = None prediction_metadata = trained_policy.predict_action_probabilities( tracker, default_domain, precomputations).action_metadata assert (prediction_metadata is None or len(prediction_metadata[RANKING_KEY]) == trained_policy.config[RANKING_LENGTH])
def test_post_training_threshold_computation( self, trained_policy: UnexpecTEDIntentPolicy, default_domain: Domain, stories_path: Path, ): training_trackers = train_trackers(default_domain, stories_path, augmentation_factor=0) training_model_data, label_ids = trained_policy._prepare_for_training( training_trackers, default_domain, precomputations=None) trained_policy.compute_label_quantiles_post_training( training_model_data, label_ids) computed_thresholds = trained_policy.label_quantiles # -1 is used for padding and hence is not expected in the keys expected_keys = list(np.unique(label_ids)) expected_keys.remove(-1) assert sorted(list( computed_thresholds.keys())) == sorted(expected_keys)
def test_filter_training_trackers( self, tracker_events_for_training: List[List[Event]], expected_trackers_with_events: List[List[Event]], domain: Domain, ): trackers_for_training = [ TrackerWithCachedStates.from_events(sender_id=f"{tracker_index}", evts=events, domain=domain) for tracker_index, events in enumerate(tracker_events_for_training) ] filtered_trackers = UnexpecTEDIntentPolicy._get_trackers_for_training( trackers_for_training) assert len(filtered_trackers) == len(expected_trackers_with_events) for collected_tracker, expected_tracker_events in zip( filtered_trackers, expected_trackers_with_events): collected_tracker_events = list(collected_tracker.events) assert collected_tracker_events == expected_tracker_events
def test_label_quantiles_computation(self): label_id_scores = { 0: { POSITIVE_SCORES_KEY: [1.3, 0.2], NEGATIVE_SCORES_KEY: [ -0.1, -1.2, -2.3, -4.1, -0.5, 0.2, 0.8, 0.9, -3.2, -2.7, ], }, 3: {POSITIVE_SCORES_KEY: [1.3, 0.2], NEGATIVE_SCORES_KEY: [-0.1]}, 6: {POSITIVE_SCORES_KEY: [1.3, 0.2], NEGATIVE_SCORES_KEY: []}, } expected_thresholds = { 0: [ 0.2, 0.2, 0.2, 0.2, 0.2, -0.1, -0.1, -0.5, -0.5, -1.2, -1.2, -1.2, -2.3, -2.3, -2.7, -2.7, -3.2, -3.2, -4.1, -4.1, ], 3: [ -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, ], 6: [ 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, ], } thresholds = UnexpecTEDIntentPolicy._compute_label_quantiles(label_id_scores) assert sorted(list(thresholds.keys())) == sorted( list(expected_thresholds.keys()) ) for label_id, tolerance_thresholds in thresholds.items(): assert expected_thresholds[label_id] == tolerance_thresholds