def _prepare_sparse_dense_layers(self, data_signature: List[FeatureSignature], name: Text, dense_dim: int) -> None: sparse = False dense = False for is_sparse, _, _ in data_signature: if is_sparse: sparse = True else: dense = True if sparse: self._tf_layers[f"sparse_to_dense.{name}"] = layers.DenseForSparse( units=dense_dim, reg_lambda=self.config[REGULARIZATION_CONSTANT], name=name, ) if not dense: # create dense labels for the input to use in negative sampling self._tf_layers[ f"sparse_to_dense_ids.{name}"] = layers.DenseForSparse( units=2, use_bias=False, trainable=False, name=f"sparse_to_dense_ids.{name}", )
def _prepare_masked_language_modeling( self, attribute: Text, attribute_signature: Dict[Text, List[FeatureSignature]], config: Dict[Text, Any], ) -> None: """Prepares masking and computing helper variables for masked language modeling. Only done for the text attribute and only if sequence-level (token-level) features are present (MLM requires token-level information). """ if attribute == TEXT and SEQUENCE in attribute_signature and config[ MASKED_LM]: self._enables_mlm = True self._tf_layers[self.MLM_INPUT_MASK] = layers.InputMask() # Unique IDs of different token types are needed to construct the possible # label space for MLM. If dense features are present, they're used as such # IDs, othwerise sparse features are embedded by a non-trainable # DenseForSparse layer to create small embeddings that serve as IDs. expect_dense_seq_features = any([ not signature.is_sparse for signature in attribute_signature[SEQUENCE] ]) if not expect_dense_seq_features: self._tf_layers[ self. SPARSE_TO_DENSE_FOR_TOKEN_IDS] = layers.DenseForSparse( units=2, use_bias=False, trainable=False, name= f"{self.SPARSE_TO_DENSE_FOR_TOKEN_IDS}.{attribute}", )
def __init__(self, dense_dim: List[int], model_dim: int, reg_lambda: float, drop_rate: float): super(InputLayer, self).__init__() self.dense_layers = [ tf.keras.layers.Dense(i, activation='relu') for i in dense_dim ] self.sparse_dropout_layer = layers.SparseDropout(drop_rate) self.sparse_to_dense_layer = layers.DenseForSparse( units=dense_dim[0], reg_lambda=reg_lambda) self.output_layer = tf.keras.layers.Dense(model_dim, activation='relu')
def init_sparse_to_dense_layer(attribute, feature_type, input_size, output_size, reg_lambda): kernel_initializer = tf.constant_initializer( np.random.random((input_size, output_size))) layer = layers.DenseForSparse( name=f"sparse_to_dense.{attribute}_{feature_type}", kernel_initializer=kernel_initializer, reg_lambda=reg_lambda, units=output_size, ) layer.build(input_shape=input_size) return layer
def test_replace_dense_for_sparse_layers( new_sparse_feature_sizes: List[int], old_sparse_feature_sizes: List[int], feature_type: Text, use_bias: bool, ): """Tests if `DenseForSparse` layers are adjusted correctly.""" output_units = 10 kernel_initializer = tf.constant_initializer( np.random.random((sum(old_sparse_feature_sizes), output_units))) layer = layers.DenseForSparse(units=output_units, kernel_initializer=kernel_initializer, use_bias=use_bias) layer.build(input_shape=sum(old_sparse_feature_sizes)) new_layer = RasaCustomLayer._replace_dense_for_sparse_layer( layer_to_replace=layer, new_sparse_feature_sizes=new_sparse_feature_sizes, old_sparse_feature_sizes=old_sparse_feature_sizes, attribute=TEXT, feature_type=feature_type, reg_lambda=0.02, ) new_layer.build(input_shape=sum(new_sparse_feature_sizes)) # check dimensions assert new_layer.get_kernel().shape[0] == sum(new_sparse_feature_sizes) # check if bias tensor was preserved correctly if use_bias: assert np.array_equal(layer.get_bias().numpy(), new_layer.get_bias().numpy()) else: assert new_layer.get_bias() is None # check if the existing weights were preserved chunk_index, new_chunk_index = 0, 0 kernel, new_kernel = layer.get_kernel().numpy(), new_layer.get_kernel( ).numpy() for old_size, new_size in zip(old_sparse_feature_sizes, new_sparse_feature_sizes): chunk = kernel[chunk_index:chunk_index + old_size, :] new_chunk = new_kernel[new_chunk_index:new_chunk_index + old_size, :] assert np.array_equal(chunk, new_chunk) chunk_index += old_size new_chunk_index += new_size
def _prepare_layers_for_sparse_tensors(self, attribute: Text, feature_type: Text, config: Dict[Text, Any]) -> None: """Sets up sparse tensor pre-processing before combining with dense ones.""" # For optionally applying dropout to sparse tensors if config[SPARSE_INPUT_DROPOUT]: self._tf_layers[self.SPARSE_DROPOUT] = layers.SparseDropout( rate=config[DROP_RATE]) # For converting sparse tensors to dense self._tf_layers[self.SPARSE_TO_DENSE] = layers.DenseForSparse( name=f"sparse_to_dense.{attribute}_{feature_type}", units=config[DENSE_DIMENSION][attribute], reg_lambda=config[REGULARIZATION_CONSTANT], ) # For optionally apply dropout to sparse tensors after they're converted to # dense tensors. if config[DENSE_INPUT_DROPOUT]: self._tf_layers[self.DENSE_DROPOUT] = tf.keras.layers.Dropout( rate=config[DROP_RATE])
def _replace_dense_for_sparse_layer( layer_to_replace: layers.DenseForSparse, new_sparse_feature_sizes: List[int], old_sparse_feature_sizes: List[int], attribute: Text, feature_type: Text, reg_lambda: float, ) -> layers.DenseForSparse: """Replaces a `DenseForSparse` layer with a new one. Replaces an existing `DenseForSparse` layer with a new one in order to adapt it to incremental training. Args: layer_to_replace: a `DenseForSparse` layer that is used to create a new one. new_sparse_feature_sizes: sizes of sparse features that will be the input of the layer. old_sparse_feature_sizes: sizes of sparse features that used to be the input of the layer. attribute: an attribute of the data fed to the layer. feature_type: a feature type of the data fed to the layer. reg_lambda: regularization constant. Returns: New `DenseForSparse` layer. """ kernel = layer_to_replace.get_kernel().numpy() bias = layer_to_replace.get_bias() if bias is not None: bias = bias.numpy() units = layer_to_replace.get_units() # split kernel by feature sizes to update the layer accordingly kernel_splits = [] splitting_index = 0 for size in old_sparse_feature_sizes: kernel_splits.append(kernel[splitting_index:splitting_index + size, :]) splitting_index += size additional_sizes = [ new_size - old_size for new_size, old_size in zip( new_sparse_feature_sizes, old_sparse_feature_sizes) ] std, mean = np.std(kernel), np.mean(kernel) additional_weights = [ np.random.normal(mean, std, size=(num_rows, units)).astype(np.float32) for num_rows in additional_sizes ] merged_weights = [ np.vstack((existing, new)) for existing, new in zip(kernel_splits, additional_weights) ] # stack each merged weight to form a new weight tensor new_weights = np.vstack(merged_weights) kernel_init = tf.constant_initializer(new_weights) bias_init = tf.constant_initializer(bias) if bias is not None else None new_layer = layers.DenseForSparse( name=f"sparse_to_dense.{attribute}_{feature_type}", reg_lambda=reg_lambda, units=units, use_bias=bias is not None, kernel_initializer=kernel_init, bias_initializer=bias_init, ) return new_layer