def __init__( self, units: int, num_heads: int, attention_dropout_rate: float = 0.0, density: float = 0.2, unidirectional: bool = False, use_key_relative_position: bool = False, use_value_relative_position: bool = False, max_relative_position: Optional[int] = None, heads_share_relative_embedding: bool = False, ) -> None: super().__init__() if units % num_heads != 0: raise ValueError( f"number of units {units} should be proportional to " f"number of attention heads {num_heads}.") self.num_heads = num_heads self.units = units self.attention_dropout_rate = attention_dropout_rate self.unidirectional = unidirectional self.use_key_relative_position = use_key_relative_position self.use_value_relative_position = use_value_relative_position self.relative_length = max_relative_position if self.relative_length is not None: self.relative_length += 1 # include current time self.heads_share_relative_embedding = heads_share_relative_embedding self._depth = units // self.num_heads # process queries self._query_dense_layer = RandomlyConnectedDense(units=units, use_bias=False, density=density) # process keys self._key_dense_layer = RandomlyConnectedDense(units=units, use_bias=False, density=density) # process values self._value_dense_layer = RandomlyConnectedDense(units=units, use_bias=False, density=density) # process attention output self._output_dense_layer = RandomlyConnectedDense(units=units, density=density) self._create_relative_embeddings()
def test_randomly_connected_dense_all_inputs_connected(): layer = RandomlyConnectedDense(density=0.0, units=2, use_bias=False) # Create a unit vector [1, 0, 0, 0, ...] x = np.zeros(10) x[0] = 1.0 # For every standard basis vector for _ in range(10): x = np.roll(x, 1) y = layer(np.expand_dims(x, 0)) assert tf.reduce_sum(y).numpy() != 0.0
def __init__( self, units: int, num_heads: int, filter_units: int, dropout_rate: float = 0.1, attention_dropout_rate: float = 0.0, density: float = 0.2, unidirectional: bool = False, use_key_relative_position: bool = False, use_value_relative_position: bool = False, max_relative_position: Optional[int] = None, heads_share_relative_embedding: bool = False, ) -> None: super().__init__() self._layer_norm = tf.keras.layers.LayerNormalization(epsilon=1e-6) self._mha = MultiHeadAttention( units, num_heads, attention_dropout_rate, density, unidirectional, use_key_relative_position, use_value_relative_position, max_relative_position, heads_share_relative_embedding, ) self._dropout = tf.keras.layers.Dropout(dropout_rate) self._ffn_layers = [ tf.keras.layers.LayerNormalization(epsilon=1e-6), RandomlyConnectedDense( units=filter_units, activation=tfa.activations.gelu, density=density), # (batch_size, length, filter_units) tf.keras.layers.Dropout(dropout_rate), RandomlyConnectedDense( units=units, density=density), # (batch_size, length, units) tf.keras.layers.Dropout(dropout_rate), ]
def __init__( self, num_layers: int, units: int, num_heads: int, filter_units: int, reg_lambda: float, dropout_rate: float = 0.1, attention_dropout_rate: float = 0.0, density: float = 0.2, unidirectional: bool = False, use_key_relative_position: bool = False, use_value_relative_position: bool = False, max_relative_position: Optional[int] = None, heads_share_relative_embedding: bool = False, name: Optional[Text] = None, ) -> None: super().__init__(name=name) self.units = units self.unidirectional = unidirectional l2_regularizer = tf.keras.regularizers.l2(reg_lambda) self._embedding = RandomlyConnectedDense( units=units, kernel_regularizer=l2_regularizer, density=density) # positional encoding helpers self._angles = self._get_angles() self._even_indices = np.arange(0, self.units, 2, dtype=np.int32)[:, np.newaxis] self._odd_indices = np.arange(1, self.units, 2, dtype=np.int32)[:, np.newaxis] self._dropout = tf.keras.layers.Dropout(dropout_rate) self._enc_layers = [ TransformerEncoderLayer( units, num_heads, filter_units, dropout_rate, attention_dropout_rate, density, unidirectional, use_key_relative_position, use_value_relative_position, max_relative_position, heads_share_relative_embedding, ) for _ in range(num_layers) ] self._layer_norm = tf.keras.layers.LayerNormalization(epsilon=1e-6)
def test_randomly_connected_dense_output_always_dense( inputs: np.array, units: int, expected_num_non_zero_outputs: int): layer = RandomlyConnectedDense(density=0.0, units=units, use_bias=False) y = layer(inputs) num_non_zero_outputs = tf.math.count_nonzero(y).numpy() assert num_non_zero_outputs == expected_num_non_zero_outputs
def test_randomly_connected_dense_shape(inputs: np.array, units: int, expected_output_shape: Tuple[int]): layer = RandomlyConnectedDense(units=units) y = layer(inputs) assert y.shape == expected_output_shape
def test_randomly_connected_dense_shape(inputs, units, expected_output_shape): layer = RandomlyConnectedDense(units=units) y = layer(inputs) assert y.shape == expected_output_shape