def test_label_data_assembly( self, trained_policy: UnexpecTEDIntentPolicy, default_domain: Domain ): # Construct input data state_featurizer = trained_policy.featurizer.state_featurizer encoded_all_labels = state_featurizer.encode_all_labels( default_domain, precomputations=None ) attribute_data, _ = model_data_utils.convert_to_data_format(encoded_all_labels) assembled_label_data = trained_policy._assemble_label_data( attribute_data, default_domain ) assembled_label_data_signature = assembled_label_data.get_signature() assert list(assembled_label_data_signature.keys()) == [ f"{LABEL}_{INTENT}", LABEL, ] assert assembled_label_data.num_examples == len(default_domain.intents) assert list(assembled_label_data_signature[f"{LABEL}_{INTENT}"].keys()) == [ MASK, SENTENCE, ] assert list(assembled_label_data_signature[LABEL].keys()) == [IDS] assert assembled_label_data_signature[f"{LABEL}_{INTENT}"][SENTENCE][ 0 ].units == len(default_domain.intents)
def train( self, training_trackers: List[TrackerWithCachedStates], domain: Domain, interpreter: NaturalLanguageInterpreter, **kwargs: Any, ) -> None: tracker_state_features, label_ids = self.featurize_for_training( training_trackers, domain, interpreter, **kwargs) training_data, zero_state_features = model_data_utils.convert_to_data_format( tracker_state_features) self.zero_state_features = zero_state_features self._train_params.update(kwargs) model = self.model_architecture(**self._train_params) score = None # Note: clone is called throughout to avoid mutating default arguments. self.label_encoder = clone(self.label_encoder).fit(label_ids) X = self._preprocess_data(training_data) y = self.label_encoder.transform(label_ids) if self.shuffle: X, y = sklearn_shuffle(X, y) if self.cv is None: model = clone(model).fit(X, y) else: param_grid = self.param_grid or {} model, score = self._search_and_score(model, X, y, param_grid) self.model = model logger.info("Done fitting sklearn policy model") if score is not None: logger.info(f"Cross validation score: {score:.5f}")
def test_label_data_assembly(self, trained_policy: TEDPolicy, default_domain: Domain): state_featurizer = trained_policy.featurizer.state_featurizer encoded_all_labels = state_featurizer.encode_all_labels( default_domain, precomputations=None) attribute_data, _ = model_data_utils.convert_to_data_format( encoded_all_labels) assembled_label_data = trained_policy._assemble_label_data( attribute_data, default_domain) assembled_label_data_signature = assembled_label_data.get_signature() assert list(assembled_label_data_signature.keys()) == [ f"{LABEL}_{ACTION_NAME}", f"{LABEL}", ] assert assembled_label_data.num_examples == default_domain.num_actions assert list(assembled_label_data_signature[f"{LABEL}_{ACTION_NAME}"]. keys()) == [ MASK, SENTENCE, ] assert list(assembled_label_data_signature[LABEL].keys()) == [IDS] assert (assembled_label_data_signature[f"{LABEL}_{ACTION_NAME}"] [SENTENCE][0].units == default_domain.num_actions)
def predict_action_probabilities( self, tracker: DialogueStateTracker, domain: Domain, interpreter: NaturalLanguageInterpreter, **kwargs: Any, ) -> PolicyPrediction: """Predicts the next action the bot should take after seeing the tracker. Args: tracker: the :class:`rasa.core.trackers.DialogueStateTracker` domain: the :class:`rasa.shared.core.domain.Domain` interpreter: Interpreter which may be used by the policies to create additional features. Returns: The policy's prediction (e.g. the probabilities for the actions). """ X = self._featurize_for_prediction(tracker, domain, interpreter) training_data, _ = model_data_utils.convert_to_data_format( X, self.zero_state_features ) Xt = self._preprocess_data(training_data) y_proba = self.model.predict_proba(Xt) return self._prediction(self._postprocess_prediction(y_proba, domain))
def _create_model_data( self, tracker_state_features: List[List[Dict[Text, List["Features"]]]], label_ids: Optional[np.ndarray] = None, encoded_all_labels: Optional[List[Dict[Text, List["Features"]]]] = None, ) -> RasaModelData: """Combine all model related data into RasaModelData. Args: tracker_state_features: a dictionary of attributes (INTENT, TEXT, ACTION_NAME, ACTION_TEXT, ENTITIES, SLOTS, ACTIVE_LOOP) to a list of features for all dialogue turns in all training trackers label_ids: the label ids (e.g. action ids) for every dialogue turn in all training trackers encoded_all_labels: a list of dictionaries containing attribute features for labels ids Returns: RasaModelData """ model_data = RasaModelData(label_key=LABEL_KEY, label_sub_key=LABEL_SUB_KEY) if label_ids is not None and encoded_all_labels is not None: label_ids = np.array( [np.expand_dims(seq_label_ids, -1) for seq_label_ids in label_ids] ) model_data.add_features(LABEL_KEY, LABEL_SUB_KEY, [label_ids]) attribute_data, self.zero_state_features = convert_to_data_format( tracker_state_features ) else: # method is called during prediction attribute_data, _ = convert_to_data_format( tracker_state_features, self.zero_state_features ) model_data.add_data(attribute_data) model_data.add_lengths( DIALOGUE, LENGTH, next(iter(list(attribute_data.keys()))), MASK ) return model_data
def predict_action_probabilities( self, tracker: DialogueStateTracker, domain: Domain, interpreter: NaturalLanguageInterpreter, **kwargs: Any, ) -> List[float]: X = self.featurizer.create_state_features([tracker], domain, interpreter) training_data, _ = model_data_utils.convert_to_data_format( X, self.zero_state_features) Xt = self._preprocess_data(training_data) y_proba = self.model.predict_proba(Xt) return self._postprocess_prediction(y_proba, domain)
def _create_label_data( self, domain: Domain, interpreter: NaturalLanguageInterpreter ) -> Tuple[RasaModelData, List[Dict[Text, List["Features"]]]]: # encode all label_ids with policies' featurizer state_featurizer = self.featurizer.state_featurizer encoded_all_labels = state_featurizer.encode_all_labels( domain, interpreter) attribute_data, _ = convert_to_data_format( encoded_all_labels, featurizers=self.config[FEATURIZERS]) label_data = self._assemble_label_data(attribute_data, domain) return label_data, encoded_all_labels
def _create_label_data( self, domain: Domain, interpreter: NaturalLanguageInterpreter ) -> Tuple[RasaModelData, List[Dict[Text, List["Features"]]]]: # encode all label_ids with policies' featurizer state_featurizer = self.featurizer.state_featurizer encoded_all_labels = state_featurizer.encode_all_actions(domain, interpreter) attribute_data, _ = convert_to_data_format(encoded_all_labels) label_data = RasaModelData() label_data.add_data(attribute_data, key_prefix=f"{LABEL_KEY}_") label_ids = np.arange(domain.num_actions) label_data.add_features( LABEL_KEY, LABEL_SUB_KEY, [np.expand_dims(label_ids, -1)] ) return label_data, encoded_all_labels
def _create_data_for_entities( self, entity_tags: Optional[List[List[Dict[Text, List["Features"]]]]] ) -> Optional[Data]: if not self.config[ENTITY_RECOGNITION]: return None # check that there are real entity tags if entity_tags and self._should_extract_entities(entity_tags): entity_tags_data, _ = convert_to_data_format(entity_tags) return entity_tags_data # there are no "real" entity tags logger.debug( f"Entity recognition cannot be performed, " f"set '{ENTITY_RECOGNITION}' config parameter to 'False'.") self.config[ENTITY_RECOGNITION] = False return None
def _create_model_data( self, tracker_state_features: List[List[Dict[Text, List["Features"]]]], label_ids: Optional[np.ndarray] = None, entity_tags: Optional[List[List[Dict[Text, List["Features"]]]]] = None, encoded_all_labels: Optional[List[Dict[Text, List["Features"]]]] = None, ) -> RasaModelData: """Combine all model related data into RasaModelData. Args: tracker_state_features: a dictionary of attributes (INTENT, TEXT, ACTION_NAME, ACTION_TEXT, ENTITIES, SLOTS, ACTIVE_LOOP) to a list of features for all dialogue turns in all training trackers label_ids: the label ids (e.g. action ids) for every dialogue turn in all training trackers entity_tags: a dictionary of entity type (ENTITY_TAGS) to a list of features containing entity tag ids for text user inputs otherwise empty dict for all dialogue turns in all training trackers encoded_all_labels: a list of dictionaries containing attribute features for label ids Returns: RasaModelData """ model_data = RasaModelData(label_key=LABEL_KEY, label_sub_key=LABEL_SUB_KEY) if label_ids is not None and encoded_all_labels is not None: label_ids = np.array([ np.expand_dims(seq_label_ids, -1) for seq_label_ids in label_ids ]) model_data.add_features( LABEL_KEY, LABEL_SUB_KEY, [FeatureArray(label_ids, number_of_dimensions=3)], ) attribute_data, self.fake_features = convert_to_data_format( tracker_state_features, featurizers=self.config[FEATURIZERS]) entity_tags_data = self._create_data_for_entities(entity_tags) if entity_tags_data is not None: model_data.add_data(entity_tags_data) else: # method is called during prediction attribute_data, _ = convert_to_data_format( tracker_state_features, self.fake_features, featurizers=self.config[FEATURIZERS], ) model_data.add_data(attribute_data) model_data.add_lengths(TEXT, SEQUENCE_LENGTH, TEXT, SEQUENCE) model_data.add_lengths(ACTION_TEXT, SEQUENCE_LENGTH, ACTION_TEXT, SEQUENCE) # add the dialogue lengths attribute_present = next(iter(list(attribute_data.keys()))) dialogue_lengths = np.array([ np.size(np.squeeze(f, -1)) for f in model_data.data[attribute_present][MASK][0] ]) model_data.data[DIALOGUE][LENGTH] = [ FeatureArray(dialogue_lengths, number_of_dimensions=1) ] # make sure all keys are in the same order during training and prediction model_data.sort() return model_data