def __init__(self, word_embed_matrix: np.ndarray, char_embed_matrix: np.ndarray, n_heads: int = 8, d_model: int = 128, d_filter: int = 512, char_limit: int = 16, dropout: Optional[float] = None) -> None: """QANet (Based on https://arxiv.org/abs/1804.09541) Arguments: word_embed_matrix {np.ndarray} -- Word-level embedding matrix char_embed_matrix {np.ndarray} -- Character-level embedding matrix Keyword Arguments: n_heads {int} -- Number of self-attention heads (default: {8}) d_model {int} -- Internal dimension of the model (default: {128}) d_filter {int} -- internal dimension of the filters (default: {512}) char_limit {int} -- Character limit for each word (default: {16}) dropout {Optional[float]} -- Optional dropout constant (default: {None}) """ super().__init__() self.n_symbols_in = word_embed_matrix.shape[0] self.n_symbols_out = word_embed_matrix.shape[0] # TODO: Fix this bug? self.n_heads = n_heads self.d_model = d_model self.d_filter = d_filter self.char_limit = char_limit self.dropout = dropout self.input_embedding = QANetInputEmbedding(self.d_model, word_embed_matrix, char_embed_matrix, dropout=self.dropout, batch_norm=False) self.embedding_encoder = Stack([ QANetEncoderBlock(n_conv=4, n_heads=self.n_heads, filter_size=self.d_filter, hidden_size=self.d_model, dropout=self.dropout) for _ in range(1) ]) self.context_query_attention = ContextQueryAttention( None, self.d_model) self.model_encoder_projection = tf.keras.layers.Conv1D(filters=d_model, kernel_size=1) self.model_encoder = Stack([ QANetEncoderBlock(n_conv=2, n_heads=self.n_heads, filter_size=self.d_filter, hidden_size=self.d_model, dropout=self.dropout) for _ in range(7) ]) self.output_layer = tf.keras.layers.Dense(self.n_symbols_out)
def __init__(self, filter_size: int, hidden_size: int, dropout: Optional[float]) -> None: super().__init__() dense_relu_dense = DenseStack([filter_size, hidden_size], output_activation=None) if dropout is not None: dropout = tf.keras.layers.Dropout(dropout) dense_relu_dense = Stack([dense_relu_dense, dropout]) self.residual_dense = Residual(dense_relu_dense) self.norm = LayerNorm()
def __init__(self, n_layers: int, n_heads: int, d_model: int, d_filter: int, dropout: Optional[float] = None) -> None: super().__init__() self.decoding_stack = Stack([ TransformerDecoderBlock(n_heads, d_filter, d_model, dropout) for _ in range(n_layers) ])
def _setup_logit_function(self, activation=None): ac_dim = reduce(mul, self._ac_shape) if self._logit_architecture is None: self._logit_architecture = [] logit_function = Stack() logit_function.add( DenseStack(self._logit_architecture + [ac_dim], output_activation=activation)) logit_function.add(tf.keras.layers.Reshape(self._ac_shape)) return logit_function
def __init__(self, filter_size: int, hidden_size: int, dropout: Optional[float]) -> None: """QANet Feed Forward block Arguments: filter_size {int} -- The size of the filter hidden_size {int} -- The size of the hidden layer dropout {Optional[float]} -- Optional dropout constant """ super().__init__() dense_relu_dense = DenseStack([filter_size, hidden_size], output_activation=None) if dropout is not None: dropout = tf.keras.layers.Dropout(dropout) dense_relu_dense = Stack([dense_relu_dense, dropout]) self.feed_forward = dense_relu_dense self.norm = LayerNorm()
def __init__(self, filters: int, kernel_size: int, dropout: Optional[float]) -> None: """QANet Convolutional block Arguments: filters {int} -- The number of filters in the block kernel_size {int} -- The size of the kernel for the block dropout {Optional[float]} -- Optional dropout constant """ super().__init__() conv_layer = tf.keras.layers.SeparableConv1D(filters, kernel_size, padding='same') if dropout is not None: dropout = tf.keras.layers.Dropout(dropout) conv_layer = Stack([conv_layer, dropout]) self.conv_layer = conv_layer self.norm = LayerNorm()
def __init__(self, d_model: int, word_embed_initializer: np.ndarray, char_embed_initializer: np.ndarray, dropout: Optional[float] = None, batch_norm: bool = False) -> None: """QANet Imput embedding class Arguments: d_model {int} -- The model dimension word_embed_initializer {np.ndarray} -- The word-level embedding matrix char_embed_initializer {np.ndarray} -- The character-level embedding matrix Keyword Arguments: dropout {Optional[float]} -- Dropout constant in the embedding (default: {None}) batch_norm {bool} -- Use batch normalization in the embedding (default: {False}) """ super().__init__() self.word_embedding = tf.keras.layers.Embedding( word_embed_initializer.shape[0], word_embed_initializer.shape[1], weights=[word_embed_initializer], mask_zero=True) self.char_embedding = tf.keras.layers.Embedding( char_embed_initializer.shape[0], char_embed_initializer.shape[1], weights=[char_embed_initializer], mask_zero=True) self.char_conv = tf.keras.layers.Conv1D( filters=char_embed_initializer.shape[1], kernel_size=5) self.projection_conv = tf.keras.layers.Conv1D(filters=d_model, kernel_size=1) self.highway = Stack([Highway(dropout=dropout) for _ in range(2)]) self.position_embedding = PositionEmbedding() self.dropout = None if dropout is None else tf.keras.layers.Dropout( dropout) self.batch_norm = None if batch_norm is False else tf.keras.layers.BatchNormalization( )
def __init__(self, n_conv: int, n_heads: int, filter_size: int, hidden_size: int, dropout: Optional[float] = None) -> None: """QANet Encoder Block Arguments: n_conv {int} -- Number of convolutions in the encoder layer n_heads {int} -- Number of heads in the self-attention filter_size {int} -- Filter size in the feed-forward layer hidden_size {int} -- Hidden layer size in the feed-forward layer/conv block Keyword Arguments: dropout {Optional[float]} -- Optional dropout constant (default: {None}) """ super().__init__() self.conv_stack = Stack( [QANetConvBlock(hidden_size, 7, dropout) for _ in range(n_conv)]) self.self_attention = QANetSelfAttention(n_heads, dropout) self.feed_forward = QANetFeedForward(filter_size, hidden_size, dropout)