class EmbeddingIntentClassifier(Component): """Intent classifier using supervised embeddings. The embedding intent classifier embeds user inputs and intent labels into the same space. Supervised embeddings are trained by maximizing similarity between them. It also provides rankings of the labels that did not "win". The embedding intent classifier needs to be preceded by a featurizer in the pipeline. This featurizer creates the features used for the embeddings. It is recommended to use ``CountVectorsFeaturizer`` that can be optionally preceded by ``SpacyNLP`` and ``SpacyTokenizer``. Based on the starspace idea from: https://arxiv.org/abs/1709.03856. However, in this implementation the `mu` parameter is treated differently and additional hidden layers are added together with dropout. """ provides = ["intent", "intent_ranking"] requires = [ any_of(DENSE_FEATURE_NAMES[TEXT_ATTRIBUTE], SPARSE_FEATURE_NAMES[TEXT_ATTRIBUTE]) ] # default properties (DOC MARKER - don't remove) defaults = { # nn architecture # sizes of hidden layers before the embedding layer for input words # the number of hidden layers is thus equal to the length of this list "hidden_layers_sizes_a": [256, 128], # sizes of hidden layers before the embedding layer for intent labels # the number of hidden layers is thus equal to the length of this list "hidden_layers_sizes_b": [], # Whether to share the hidden layer weights between input words and labels "share_hidden_layers": False, # training parameters # initial and final batch sizes - batch size will be # linearly increased for each epoch "batch_size": [64, 256], # how to create batches "batch_strategy": "balanced", # string 'sequence' or 'balanced' # number of epochs "epochs": 300, # set random seed to any int to get reproducible results "random_seed": None, # embedding parameters # default dense dimension used if no dense features are present "dense_dim": { "text": 512, "label": 20 }, # dimension size of embedding vectors "embed_dim": 20, # the type of the similarity "num_neg": 20, # flag if minimize only maximum similarity over incorrect actions "similarity_type": "auto", # string 'auto' or 'cosine' or 'inner' # the type of the loss function "loss_type": "softmax", # string 'softmax' or 'margin' # number of top intents to normalize scores for softmax loss_type # set to 0 to turn off normalization "ranking_length": 10, # how similar the algorithm should try # to make embedding vectors for correct labels "mu_pos": 0.8, # should be 0.0 < ... < 1.0 for 'cosine' # maximum negative similarity for incorrect labels "mu_neg": -0.4, # should be -1.0 < ... < 1.0 for 'cosine' # flag: if true, only minimize the maximum similarity for incorrect labels "use_max_sim_neg": True, # scale loss inverse proportionally to confidence of correct prediction "scale_loss": True, # regularization parameters # the scale of L2 regularization "C2": 0.002, # the scale of how critical the algorithm should be of minimizing the # maximum similarity between embeddings of different labels "C_emb": 0.8, # dropout rate for rnn "droprate": 0.2, # visualization of accuracy # how often to calculate training accuracy "evaluate_every_num_epochs": 20, # small values may hurt performance # how many examples to use for calculation of training accuracy "evaluate_on_num_examples": 0, # large values may hurt performance } # end default properties (DOC MARKER - don't remove) @staticmethod def _check_old_config_variables(config: Dict[Text, Any]) -> None: """Config migration warning""" removed_tokenization_params = [ "intent_tokenization_flag", "intent_split_symbol", ] for removed_param in removed_tokenization_params: if removed_param in config: raise_warning( f"Intent tokenization has been moved to Tokenizer components. " f"Your config still mentions '{removed_param}'. " f"Tokenization may fail if you specify the parameter here. " f"Please specify the parameter 'intent_tokenization_flag' " f"and 'intent_split_symbol' in the " f"tokenizer of your NLU pipeline", FutureWarning, ) # init helpers def _load_nn_architecture_params(self, config: Dict[Text, Any]) -> None: self.hidden_layer_sizes = { "text": config["hidden_layers_sizes_a"], "label": config["hidden_layers_sizes_b"], } self.share_hidden_layers = config["share_hidden_layers"] if (self.share_hidden_layers and self.hidden_layer_sizes["text"] != self.hidden_layer_sizes["label"]): raise ValueError("If hidden layer weights are shared," "hidden_layer_sizes for a and b must coincide.") self.batch_in_size = config["batch_size"] self.batch_in_strategy = config["batch_strategy"] self.epochs = config["epochs"] self.random_seed = self.component_config["random_seed"] def _load_embedding_params(self, config: Dict[Text, Any]) -> None: self.embed_dim = config["embed_dim"] self.num_neg = config["num_neg"] self.dense_dim = config["dense_dim"] self.similarity_type = config["similarity_type"] self.loss_type = config["loss_type"] if self.similarity_type == "auto": if self.loss_type == "softmax": self.similarity_type = "inner" elif self.loss_type == "margin": self.similarity_type = "cosine" self.ranking_length = config["ranking_length"] self.mu_pos = config["mu_pos"] self.mu_neg = config["mu_neg"] self.use_max_sim_neg = config["use_max_sim_neg"] self.scale_loss = config["scale_loss"] def _load_regularization_params(self, config: Dict[Text, Any]) -> None: self.C2 = config["C2"] self.C_emb = config["C_emb"] self.droprate = config["droprate"] def _load_visual_params(self, config: Dict[Text, Any]) -> None: self.evaluate_every_num_epochs = config["evaluate_every_num_epochs"] if self.evaluate_every_num_epochs < 1: self.evaluate_every_num_epochs = self.epochs self.evaluate_on_num_examples = config["evaluate_on_num_examples"] def _load_params(self) -> None: self._check_old_config_variables(self.component_config) self._tf_config = train_utils.load_tf_config(self.component_config) self._load_nn_architecture_params(self.component_config) self._load_embedding_params(self.component_config) self._load_regularization_params(self.component_config) self._load_visual_params(self.component_config) # package safety checks @classmethod def required_packages(cls) -> List[Text]: return ["tensorflow"] def __init__( self, component_config: Optional[Dict[Text, Any]] = None, inverted_label_dict: Optional[Dict[int, Text]] = None, session: Optional["tf.Session"] = None, graph: Optional["tf.Graph"] = None, batch_placeholder: Optional["tf.Tensor"] = None, similarity_all: Optional["tf.Tensor"] = None, pred_confidence: Optional["tf.Tensor"] = None, similarity: Optional["tf.Tensor"] = None, message_embed: Optional["tf.Tensor"] = None, label_embed: Optional["tf.Tensor"] = None, all_labels_embed: Optional["tf.Tensor"] = None, batch_tuple_sizes: Optional[Dict] = None, ) -> None: """Declare instance variables with default values""" super().__init__(component_config) self._load_params() # transform numbers to labels self.inverted_label_dict = inverted_label_dict # encode all label_ids with numbers self._label_data = None # tf related instances self.session = session self.graph = graph self.batch_in = batch_placeholder self.sim_all = similarity_all self.pred_confidence = pred_confidence self.sim = similarity # persisted embeddings self.message_embed = message_embed self.label_embed = label_embed self.all_labels_embed = all_labels_embed # keep the input tuple sizes in self.batch_in self.batch_tuple_sizes = batch_tuple_sizes # internal tf instances self._iterator = None self._train_op = None self._is_training = None # training data helpers: @staticmethod def _create_label_id_dict(training_data: "TrainingData", attribute: Text) -> Dict[Text, int]: """Create label_id dictionary""" distinct_label_ids = { example.get(attribute) for example in training_data.intent_examples } - {None} return { label_id: idx for idx, label_id in enumerate(sorted(distinct_label_ids)) } @staticmethod def _find_example_for_label(label: Text, examples: List["Message"], attribute: Text) -> Optional["Message"]: for ex in examples: if ex.get(attribute) == label: return ex return None @staticmethod def _check_labels_features_exist(labels_example: List["Message"], attribute: Text) -> bool: """Check if all labels have features set""" for label_example in labels_example: if (label_example.get(SPARSE_FEATURE_NAMES[attribute]) is None and label_example.get(DENSE_FEATURE_NAMES[attribute]) is None): return False return True @staticmethod def _extract_and_add_features( message: "Message", attribute: Text ) -> Tuple[Optional[scipy.sparse.spmatrix], Optional[np.ndarray]]: sparse_features = None dense_features = None if message.get(SPARSE_FEATURE_NAMES[attribute]) is not None: sparse_features = message.get(SPARSE_FEATURE_NAMES[attribute]) if message.get(DENSE_FEATURE_NAMES[attribute]) is not None: dense_features = message.get(DENSE_FEATURE_NAMES[attribute]) if sparse_features is not None and dense_features is not None: if sparse_features.shape[0] != dense_features.shape[0]: raise ValueError( f"Sequence dimensions for sparse and dense features " f"don't coincide in '{message.text}' for attribute '{attribute}'." ) if attribute != INTENT_ATTRIBUTE: # Use only the CLS token vector as features sparse_features = sequence_to_sentence_features(sparse_features) dense_features = sequence_to_sentence_features(dense_features) return sparse_features, dense_features def _extract_labels_precomputed_features( self, label_examples: List["Message"], attribute: Text = INTENT_ATTRIBUTE) -> List[np.ndarray]: """Collect precomputed encodings""" sparse_features = [] dense_features = [] for e in label_examples: _sparse, _dense = self._extract_and_add_features(e, attribute) if _sparse is not None: sparse_features.append(_sparse) if _dense is not None: dense_features.append(_dense) sparse_features = np.array(sparse_features) dense_features = np.array(dense_features) return [sparse_features, dense_features] @staticmethod def _compute_default_label_features( labels_example: List["Message"], ) -> List[np.ndarray]: """Compute one-hot representation for the labels""" return [ np.array([ np.expand_dims(a, 0) for a in np.eye(len(labels_example), dtype=np.float32) ]) ] def _create_label_data( self, training_data: "TrainingData", label_id_dict: Dict[Text, int], attribute: Text, ) -> "SessionDataType": """Create matrix with label_ids encoded in rows as bag of words. Find a training example for each label and get the encoded features from the corresponding Message object. If the features are already computed, fetch them from the message object else compute a one hot encoding for the label as the feature vector. """ # Collect one example for each label labels_idx_example = [] for label_name, idx in label_id_dict.items(): label_example = self._find_example_for_label( label_name, training_data.intent_examples, attribute) labels_idx_example.append((idx, label_example)) # Sort the list of tuples based on label_idx labels_idx_example = sorted(labels_idx_example, key=lambda x: x[0]) labels_example = [example for (_, example) in labels_idx_example] # Collect features, precomputed if they exist, else compute on the fly if self._check_labels_features_exist(labels_example, attribute): features = self._extract_labels_precomputed_features( labels_example, attribute) else: features = self._compute_default_label_features(labels_example) label_data = {} self._add_to_session_data(label_data, "label_features", features) self._add_mask_to_session_data(label_data, "label_mask", "label_features") return label_data def _use_default_label_features(self, label_ids: np.ndarray) -> List[np.ndarray]: return [ np.array([ self._label_data["label_features"][0][label_id] for label_id in label_ids ]) ] # noinspection PyPep8Naming def _create_session_data( self, training_data: List["Message"], label_id_dict: Optional[Dict[Text, int]] = None, label_attribute: Optional[Text] = INTENT_ATTRIBUTE, ) -> "SessionDataType": """Prepare data for training and create a SessionDataType object""" X_sparse = [] X_dense = [] Y_sparse = [] Y_dense = [] label_ids = [] for e in training_data: if e.get(label_attribute): _sparse, _dense = self._extract_and_add_features( e, TEXT_ATTRIBUTE) if _sparse is not None: X_sparse.append(_sparse) if _dense is not None: X_dense.append(_dense) _sparse, _dense = self._extract_and_add_features( e, label_attribute) if _sparse is not None: Y_sparse.append(_sparse) if _dense is not None: Y_dense.append(_dense) if label_id_dict: label_ids.append(label_id_dict[e.get(label_attribute)]) X_sparse = np.array(X_sparse) X_dense = np.array(X_dense) Y_sparse = np.array(Y_sparse) Y_dense = np.array(Y_dense) label_ids = np.array(label_ids) session_data = {} self._add_to_session_data(session_data, "text_features", [X_sparse, X_dense]) self._add_to_session_data(session_data, "label_features", [Y_sparse, Y_dense]) # explicitly add last dimension to label_ids # to track correctly dynamic sequences self._add_to_session_data(session_data, "label_ids", [np.expand_dims(label_ids, -1)]) if label_id_dict and ("label_features" not in session_data or not session_data["label_features"]): # no label features are present, get default features from _label_data session_data["label_features"] = self._use_default_label_features( label_ids) self._add_mask_to_session_data(session_data, "text_mask", "text_features") self._add_mask_to_session_data(session_data, "label_mask", "label_features") return session_data @staticmethod def _add_to_session_data(session_data: SessionDataType, key: Text, features: List[np.ndarray]): if not features: return session_data[key] = [] for data in features: if data.size > 0: session_data[key].append(data) @staticmethod def _add_mask_to_session_data(session_data: SessionDataType, key: Text, from_key: Text): session_data[key] = [] for data in session_data[from_key]: if data.size > 0: # explicitly add last dimension to mask # to track correctly dynamic sequences mask = np.array([np.ones((x.shape[0], 1)) for x in data]) session_data[key].append(mask) break # tf helpers: def _create_tf_embed_fnn( self, x_in: "tf.Tensor", layer_sizes: List[int], fnn_name: Text, embed_name: Text, ) -> "tf.Tensor": """Create nn with hidden layers and name""" x = train_utils.create_tf_fnn( x_in, layer_sizes, self.droprate, self.C2, self._is_training, layer_name_suffix=fnn_name, ) return train_utils.create_tf_embed( x, self.embed_dim, self.C2, self.similarity_type, layer_name_suffix=embed_name, ) def _combine_sparse_dense_features( self, features: List[Union[tf.Tensor, tf.SparseTensor]], mask: tf.Tensor, name: Text, ) -> tf.Tensor: dense_features = [] dense_dim = self.dense_dim[name] # if dense features are present use the feature dimension of the dense features for f in features: if not isinstance(f, tf.SparseTensor): dense_dim = f.shape[-1] break for f in features: if isinstance(f, tf.SparseTensor): dense_features.append( train_utils.tf_dense_layer_for_sparse( f, dense_dim, name, self.C2)) else: dense_features.append(f) output = tf.concat(dense_features, axis=-1) * mask # reduce dimensionality of output output = tf.reduce_sum(output, axis=1) return output def _build_tf_train_graph( self, session_data: SessionDataType) -> Tuple["tf.Tensor", "tf.Tensor"]: # get in tensors from generator self.batch_in = self._iterator.get_next() # convert encoded all labels into the batch format label_batch = train_utils.prepare_batch(self._label_data) # convert batch format into sparse and dense tensors batch_data, _ = train_utils.batch_to_session_data( self.batch_in, session_data) label_data, _ = train_utils.batch_to_session_data( label_batch, self._label_data) a = self._combine_sparse_dense_features(batch_data["text_features"], batch_data["text_mask"][0], "text") b = self._combine_sparse_dense_features(batch_data["label_features"], batch_data["label_mask"][0], "label") all_bs = self._combine_sparse_dense_features( label_data["label_features"], label_data["label_mask"][0], "label") self.message_embed = self._create_tf_embed_fnn( a, self.hidden_layer_sizes["text"], fnn_name="text_label" if self.share_hidden_layers else "text", embed_name="text", ) self.label_embed = self._create_tf_embed_fnn( b, self.hidden_layer_sizes["label"], fnn_name="text_label" if self.share_hidden_layers else "label", embed_name="label", ) self.all_labels_embed = self._create_tf_embed_fnn( all_bs, self.hidden_layer_sizes["label"], fnn_name="text_label" if self.share_hidden_layers else "label", embed_name="label", ) return train_utils.calculate_loss_acc( self.message_embed, self.label_embed, b, self.all_labels_embed, all_bs, self.num_neg, None, self.loss_type, self.mu_pos, self.mu_neg, self.use_max_sim_neg, self.C_emb, self.scale_loss, ) def _build_tf_pred_graph(self, session_data: "SessionDataType") -> "tf.Tensor": shapes, types = train_utils.get_shapes_types(session_data) batch_placeholder = [] for s, t in zip(shapes, types): batch_placeholder.append(tf.placeholder(t, s)) self.batch_in = tf.tuple(batch_placeholder) batch_data, self.batch_tuple_sizes = train_utils.batch_to_session_data( self.batch_in, session_data) a = self._combine_sparse_dense_features(batch_data["text_features"], batch_data["text_mask"][0], "text") b = self._combine_sparse_dense_features(batch_data["label_features"], batch_data["label_mask"][0], "label") self.all_labels_embed = tf.constant( self.session.run(self.all_labels_embed)) self.message_embed = self._create_tf_embed_fnn( a, self.hidden_layer_sizes["text"], fnn_name="text_label" if self.share_hidden_layers else "text", embed_name="text", ) self.sim_all = train_utils.tf_raw_sim( self.message_embed[:, tf.newaxis, :], self.all_labels_embed[tf.newaxis, :, :], None, ) self.label_embed = self._create_tf_embed_fnn( b, self.hidden_layer_sizes["label"], fnn_name="text_label" if self.share_hidden_layers else "label", embed_name="label", ) self.sim = train_utils.tf_raw_sim(self.message_embed[:, tf.newaxis, :], self.label_embed, None) return train_utils.confidence_from_sim(self.sim_all, self.similarity_type) @staticmethod def _get_num_of_features(session_data: "SessionDataType", key: Text) -> int: num_features = 0 for data in session_data[key]: if data.size > 0: num_features += data[0].shape[-1] return num_features def check_input_dimension_consistency(self, session_data: "SessionDataType"): """Check if text features and intent features have the same dimension.""" if self.share_hidden_layers: num_text_features = self._get_num_of_features( session_data, "text_features") num_intent_features = self._get_num_of_features( session_data, "label_features") if num_text_features != num_intent_features: raise ValueError( "If embeddings are shared, " "text features and label features " "must coincide. Check the output dimensions of previous components." ) def preprocess_train_data(self, training_data: "TrainingData"): """Prepares data for training. Performs sanity checks on training data, extracts encodings for labels. """ label_id_dict = self._create_label_id_dict(training_data, attribute=INTENT_ATTRIBUTE) self.inverted_label_dict = {v: k for k, v in label_id_dict.items()} self._label_data = self._create_label_data(training_data, label_id_dict, attribute=INTENT_ATTRIBUTE) session_data = self._create_session_data( training_data.intent_examples, label_id_dict, label_attribute=INTENT_ATTRIBUTE, ) self.check_input_dimension_consistency(session_data) return session_data @staticmethod def _check_enough_labels(session_data: "SessionDataType") -> bool: return len(np.unique(session_data["label_ids"])) >= 2 def train( self, training_data: "TrainingData", cfg: Optional["RasaNLUModelConfig"] = None, **kwargs: Any, ) -> None: """Train the embedding intent classifier on a data set.""" logger.debug("Started training embedding classifier.") # set numpy random seed np.random.seed(self.random_seed) session_data = self.preprocess_train_data(training_data) possible_to_train = self._check_enough_labels(session_data) if not possible_to_train: logger.error("Can not train a classifier. " "Need at least 2 different classes. " "Skipping training of classifier.") return if self.evaluate_on_num_examples: session_data, eval_session_data = train_utils.train_val_split( session_data, self.evaluate_on_num_examples, self.random_seed, label_key="label_ids", ) else: eval_session_data = None self.graph = tf.Graph() with self.graph.as_default(): # set random seed tf.set_random_seed(self.random_seed) # allows increasing batch size batch_size_in = tf.placeholder(tf.int64) ( self._iterator, train_init_op, eval_init_op, ) = train_utils.create_iterator_init_datasets( session_data, eval_session_data, batch_size_in, self.batch_in_strategy, label_key="label_ids", ) self._is_training = tf.placeholder_with_default(False, shape=()) loss, acc = self._build_tf_train_graph(session_data) # define which optimizer to use self._train_op = tf.train.AdamOptimizer().minimize(loss) # train tensorflow graph self.session = tf.Session(config=self._tf_config) train_utils.train_tf_dataset( train_init_op, eval_init_op, batch_size_in, loss, acc, self._train_op, self.session, self._is_training, self.epochs, self.batch_in_size, self.evaluate_on_num_examples, self.evaluate_every_num_epochs, ) # rebuild the graph for prediction self.pred_confidence = self._build_tf_pred_graph(session_data) # process helpers # noinspection PyPep8Naming def _calculate_message_sim( self, batch: Tuple[np.ndarray]) -> Tuple[np.ndarray, List[float]]: """Calculate message similarities""" message_sim = self.session.run( self.pred_confidence, feed_dict={ _x_in: _x for _x_in, _x in zip(self.batch_in, batch) if _x is not None }, ) message_sim = message_sim.flatten() # sim is a matrix label_ids = message_sim.argsort()[::-1] if self.loss_type == "softmax" and self.ranking_length > 0: message_sim = train_utils.normalize(message_sim, self.ranking_length) message_sim[::-1].sort() # transform sim to python list for JSON serializing return label_ids, message_sim.tolist() @staticmethod def _text_features_present(session_data: SessionDataType) -> bool: return np.array([ f.nnz != 0 if isinstance(f, scipy.sparse.spmatrix) else f.any() for features in session_data["text_features"] for f in features ]).any() def predict_label( self, message: "Message" ) -> Tuple[Dict[Text, Any], List[Dict[Text, Any]]]: """Predicts the intent of the provided message.""" label = {"name": None, "confidence": 0.0} label_ranking = [] if self.session is None: logger.error("There is no trained tf.session: " "component is either not trained or " "didn't receive enough training data.") return label, label_ranking # create session data from message and convert it into a batch of 1 session_data = self._create_session_data([message]) # if no text-features are present (e.g. incoming message is not in the # vocab), do not predict a random intent if not self._text_features_present(session_data): return label, label_ranking batch = train_utils.prepare_batch(session_data, tuple_sizes=self.batch_tuple_sizes) # load tf graph and session label_ids, message_sim = self._calculate_message_sim(batch) # if X contains all zeros do not predict some label if label_ids.size > 0: label = { "name": self.inverted_label_dict[label_ids[0]], "confidence": message_sim[0], } if self.ranking_length and 0 < self.ranking_length < LABEL_RANKING_LENGTH: output_length = self.ranking_length else: output_length = LABEL_RANKING_LENGTH ranking = list(zip(list(label_ids), message_sim)) ranking = ranking[:output_length] label_ranking = [{ "name": self.inverted_label_dict[label_idx], "confidence": score } for label_idx, score in ranking] return label, label_ranking def process(self, message: "Message", **kwargs: Any) -> None: """Return the most likely label and its similarity to the input.""" label, label_ranking = self.predict_label(message) message.set("intent", label, add_to_output=True) message.set("intent_ranking", label_ranking, add_to_output=True) def persist(self, file_name: Text, model_dir: Text) -> Dict[Text, Any]: """Persist this model into the passed directory. Return the metadata necessary to load the model again. """ if self.session is None: return {"file": None} checkpoint = os.path.join(model_dir, file_name + ".ckpt") try: os.makedirs(os.path.dirname(checkpoint)) except OSError as e: # be happy if someone already created the path import errno if e.errno != errno.EEXIST: raise with self.graph.as_default(): train_utils.persist_tensor("batch_placeholder", self.batch_in, self.graph) train_utils.persist_tensor("similarity_all", self.sim_all, self.graph) train_utils.persist_tensor("pred_confidence", self.pred_confidence, self.graph) train_utils.persist_tensor("similarity", self.sim, self.graph) train_utils.persist_tensor("message_embed", self.message_embed, self.graph) train_utils.persist_tensor("label_embed", self.label_embed, self.graph) train_utils.persist_tensor("all_labels_embed", self.all_labels_embed, self.graph) saver = tf.train.Saver() saver.save(self.session, checkpoint) with open(os.path.join(model_dir, file_name + ".inv_label_dict.pkl"), "wb") as f: pickle.dump(self.inverted_label_dict, f) with open(os.path.join(model_dir, file_name + ".tf_config.pkl"), "wb") as f: pickle.dump(self._tf_config, f) with open( os.path.join(model_dir, file_name + ".batch_tuple_sizes.pkl"), "wb") as f: pickle.dump(self.batch_tuple_sizes, f) return {"file": file_name} @classmethod def load( cls, meta: Dict[Text, Any], model_dir: Text = None, model_metadata: "Metadata" = None, cached_component: Optional["EmbeddingIntentClassifier"] = None, **kwargs: Any, ) -> "EmbeddingIntentClassifier": """Loads the trained model from the provided directory.""" if model_dir and meta.get("file"): file_name = meta.get("file") checkpoint = os.path.join(model_dir, file_name + ".ckpt") with open(os.path.join(model_dir, file_name + ".tf_config.pkl"), "rb") as f: _tf_config = pickle.load(f) graph = tf.Graph() with graph.as_default(): session = tf.compat.v1.Session(config=_tf_config) saver = tf.compat.v1.train.import_meta_graph(checkpoint + ".meta") saver.restore(session, checkpoint) batch_in = train_utils.load_tensor("batch_placeholder") sim_all = train_utils.load_tensor("similarity_all") pred_confidence = train_utils.load_tensor("pred_confidence") sim = train_utils.load_tensor("similarity") message_embed = train_utils.load_tensor("message_embed") label_embed = train_utils.load_tensor("label_embed") all_labels_embed = train_utils.load_tensor("all_labels_embed") with open( os.path.join(model_dir, file_name + ".inv_label_dict.pkl"), "rb") as f: inv_label_dict = pickle.load(f) with open( os.path.join(model_dir, file_name + ".batch_tuple_sizes.pkl"), "rb") as f: batch_tuple_sizes = pickle.load(f) return cls( component_config=meta, inverted_label_dict=inv_label_dict, session=session, graph=graph, batch_placeholder=batch_in, similarity_all=sim_all, pred_confidence=pred_confidence, similarity=sim, message_embed=message_embed, label_embed=label_embed, all_labels_embed=all_labels_embed, batch_tuple_sizes=batch_tuple_sizes, ) else: raise_warning( f"Failed to load nlu model. " f"Maybe the path '{os.path.abspath(model_dir)}' doesn't exist?" ) return cls(component_config=meta)
class ResponseSelector(EmbeddingIntentClassifier): """Response selector using supervised embeddings. The response selector embeds user inputs and candidate response into the same space. Supervised embeddings are trained by maximizing similarity between them. It also provides rankings of the response that did not "win". The supervised response selector needs to be preceded by a featurizer in the pipeline. This featurizer creates the features used for the embeddings. It is recommended to use ``CountVectorsFeaturizer`` that can be optionally preceded by ``SpacyNLP`` and ``SpacyTokenizer``. Based on the starspace idea from: https://arxiv.org/abs/1709.03856. However, in this implementation the `mu` parameter is treated differently and additional hidden layers are added together with dropout. """ provides = [RESPONSE_ATTRIBUTE, "response_ranking"] requires = [ any_of(DENSE_FEATURE_NAMES[TEXT_ATTRIBUTE], SPARSE_FEATURE_NAMES[TEXT_ATTRIBUTE]), any_of( DENSE_FEATURE_NAMES[RESPONSE_ATTRIBUTE], SPARSE_FEATURE_NAMES[RESPONSE_ATTRIBUTE], ), ] # default properties (DOC MARKER - don't remove) defaults = { # nn architecture # sizes of hidden layers before the embedding layer for input words # the number of hidden layers is thus equal to the length of this list "hidden_layers_sizes_a": [256, 128], # sizes of hidden layers before the embedding layer for intent labels # the number of hidden layers is thus equal to the length of this list "hidden_layers_sizes_b": [256, 128], # Whether to share the hidden layer weights between input words and intent labels "share_hidden_layers": False, # training parameters # initial and final batch sizes - batch size will be # linearly increased for each epoch "batch_size": [64, 256], # how to create batches "batch_strategy": "balanced", # string 'sequence' or 'balanced' # number of epochs "epochs": 300, # set random seed to any int to get reproducible results "random_seed": None, # embedding parameters # default dense dimension used if no dense features are present "dense_dim": { "text": 512, "label": 20 }, # dimension size of embedding vectors "embed_dim": 20, # the type of the similarity "num_neg": 20, # flag if minimize only maximum similarity over incorrect actions "similarity_type": "auto", # string 'auto' or 'cosine' or 'inner' # the type of the loss function "loss_type": "softmax", # string 'softmax' or 'margin' # how similar the algorithm should try # to make embedding vectors for correct intent labels "mu_pos": 0.8, # should be 0.0 < ... < 1.0 for 'cosine' # maximum negative similarity for incorrect intent labels "mu_neg": -0.4, # should be -1.0 < ... < 1.0 for 'cosine' # flag: if true, only minimize the maximum similarity for # incorrect intent labels "use_max_sim_neg": True, # scale loss inverse proportionally to confidence of correct prediction "scale_loss": True, # regularization parameters # the scale of L2 regularization "C2": 0.002, # the scale of how critical the algorithm should be of minimizing the # maximum similarity between embeddings of different intent labels "C_emb": 0.8, # dropout rate for rnn "droprate": 0.2, # visualization of accuracy # how often to calculate training accuracy "evaluate_every_num_epochs": 20, # small values may hurt performance # how many examples to use for calculation of training accuracy "evaluate_on_num_examples": 0, # large values may hurt performance, # selector config # name of the intent for which this response selector is to be trained "retrieval_intent": None, } # end default properties (DOC MARKER - don't remove) def _load_selector_params(self, config: Dict[Text, Any]): self.retrieval_intent = config["retrieval_intent"] if not self.retrieval_intent: # retrieval intent was left to its default value logger.info( "Retrieval intent parameter was left to its default value. This response selector will be trained" "on training examples combining all retrieval intents.") def _load_params(self) -> None: super()._load_params() self._load_selector_params(self.component_config) @staticmethod def _set_message_property(message: "Message", prediction_dict: Dict[Text, Any], selector_key: Text): message_selector_properties = message.get( RESPONSE_SELECTOR_PROPERTY_NAME, {}) message_selector_properties[selector_key] = prediction_dict message.set( RESPONSE_SELECTOR_PROPERTY_NAME, message_selector_properties, add_to_output=True, ) def preprocess_train_data(self, training_data): """Performs sanity checks on training data, extracts encodings for labels and prepares data for training""" if self.retrieval_intent: training_data = training_data.filter_by_intent( self.retrieval_intent) label_id_dict = self._create_label_id_dict( training_data, attribute=RESPONSE_ATTRIBUTE) self.inverted_label_dict = {v: k for k, v in label_id_dict.items()} self._label_data = self._create_label_data( training_data, label_id_dict, attribute=RESPONSE_ATTRIBUTE) session_data = self._create_session_data( training_data.intent_examples, label_id_dict, label_attribute=RESPONSE_ATTRIBUTE, ) self.check_input_dimension_consistency(session_data) return session_data def process(self, message: "Message", **kwargs: Any) -> None: """Return the most likely response and its similarity to the input.""" label, label_ranking = self.predict_label(message) selector_key = (self.retrieval_intent if self.retrieval_intent else DEFAULT_OPEN_UTTERANCE_TYPE) logger.debug( f"Adding following selector key to message property: {selector_key}" ) prediction_dict = {"response": label, "ranking": label_ranking} self._set_message_property(message, prediction_dict, selector_key)