示例#1
0
    def __init__(self,
                 word_embed_matrix: np.ndarray,
                 char_embed_matrix: np.ndarray,
                 n_heads: int = 8,
                 d_model: int = 128,
                 d_filter: int = 512,
                 char_limit: int = 16,
                 dropout: Optional[float] = None) -> None:
        """QANet (Based on https://arxiv.org/abs/1804.09541)

        Arguments:
            word_embed_matrix {np.ndarray} -- Word-level embedding matrix
            char_embed_matrix {np.ndarray} -- Character-level embedding matrix

        Keyword Arguments:
            n_heads {int} -- Number of self-attention heads (default: {8})
            d_model {int} -- Internal dimension of the model (default: {128})
            d_filter {int} -- internal dimension of the filters (default: {512})
            char_limit {int} -- Character limit for each word (default: {16})
            dropout {Optional[float]} -- Optional dropout constant (default: {None})
        """

        super().__init__()
        self.n_symbols_in = word_embed_matrix.shape[0]
        self.n_symbols_out = word_embed_matrix.shape[0]  # TODO: Fix this bug?
        self.n_heads = n_heads
        self.d_model = d_model
        self.d_filter = d_filter
        self.char_limit = char_limit
        self.dropout = dropout

        self.input_embedding = QANetInputEmbedding(self.d_model,
                                                   word_embed_matrix,
                                                   char_embed_matrix,
                                                   dropout=self.dropout,
                                                   batch_norm=False)
        self.embedding_encoder = Stack([
            QANetEncoderBlock(n_conv=4,
                              n_heads=self.n_heads,
                              filter_size=self.d_filter,
                              hidden_size=self.d_model,
                              dropout=self.dropout) for _ in range(1)
        ])

        self.context_query_attention = ContextQueryAttention(
            None, self.d_model)

        self.model_encoder_projection = tf.keras.layers.Conv1D(filters=d_model,
                                                               kernel_size=1)
        self.model_encoder = Stack([
            QANetEncoderBlock(n_conv=2,
                              n_heads=self.n_heads,
                              filter_size=self.d_filter,
                              hidden_size=self.d_model,
                              dropout=self.dropout) for _ in range(7)
        ])

        self.output_layer = tf.keras.layers.Dense(self.n_symbols_out)
示例#2
0
 def __init__(self, filter_size: int, hidden_size: int,
              dropout: Optional[float]) -> None:
     super().__init__()
     dense_relu_dense = DenseStack([filter_size, hidden_size],
                                   output_activation=None)
     if dropout is not None:
         dropout = tf.keras.layers.Dropout(dropout)
         dense_relu_dense = Stack([dense_relu_dense, dropout])
     self.residual_dense = Residual(dense_relu_dense)
     self.norm = LayerNorm()
示例#3
0
 def __init__(self,
              n_layers: int,
              n_heads: int,
              d_model: int,
              d_filter: int,
              dropout: Optional[float] = None) -> None:
     super().__init__()
     self.decoding_stack = Stack([
         TransformerDecoderBlock(n_heads, d_filter, d_model, dropout)
         for _ in range(n_layers)
     ])
示例#4
0
    def _setup_logit_function(self, activation=None):
        ac_dim = reduce(mul, self._ac_shape)

        if self._logit_architecture is None:
            self._logit_architecture = []
        logit_function = Stack()
        logit_function.add(
            DenseStack(self._logit_architecture + [ac_dim],
                       output_activation=activation))
        logit_function.add(tf.keras.layers.Reshape(self._ac_shape))
        return logit_function
示例#5
0
    def __init__(self, filter_size: int, hidden_size: int,
                 dropout: Optional[float]) -> None:
        """QANet Feed Forward block

        Arguments:
            filter_size {int} -- The size of the filter
            hidden_size {int} -- The size of the hidden layer
            dropout {Optional[float]} -- Optional dropout constant
        """

        super().__init__()
        dense_relu_dense = DenseStack([filter_size, hidden_size],
                                      output_activation=None)
        if dropout is not None:
            dropout = tf.keras.layers.Dropout(dropout)
            dense_relu_dense = Stack([dense_relu_dense, dropout])
        self.feed_forward = dense_relu_dense
        self.norm = LayerNorm()
示例#6
0
    def __init__(self, filters: int, kernel_size: int,
                 dropout: Optional[float]) -> None:
        """QANet Convolutional block

        Arguments:
            filters {int} -- The number of filters in the block
            kernel_size {int} -- The size of the kernel for the block
            dropout {Optional[float]} -- Optional dropout constant
        """

        super().__init__()
        conv_layer = tf.keras.layers.SeparableConv1D(filters,
                                                     kernel_size,
                                                     padding='same')
        if dropout is not None:
            dropout = tf.keras.layers.Dropout(dropout)
            conv_layer = Stack([conv_layer, dropout])
        self.conv_layer = conv_layer
        self.norm = LayerNorm()
示例#7
0
    def __init__(self,
                 d_model: int,
                 word_embed_initializer: np.ndarray,
                 char_embed_initializer: np.ndarray,
                 dropout: Optional[float] = None,
                 batch_norm: bool = False) -> None:
        """QANet Imput embedding class

        Arguments:
            d_model {int} -- The model dimension
            word_embed_initializer {np.ndarray} -- The word-level embedding matrix
            char_embed_initializer {np.ndarray} -- The character-level embedding matrix

        Keyword Arguments:
            dropout {Optional[float]} -- Dropout constant in the embedding (default: {None})
            batch_norm {bool} -- Use batch normalization in the embedding (default: {False})
        """

        super().__init__()
        self.word_embedding = tf.keras.layers.Embedding(
            word_embed_initializer.shape[0],
            word_embed_initializer.shape[1],
            weights=[word_embed_initializer],
            mask_zero=True)
        self.char_embedding = tf.keras.layers.Embedding(
            char_embed_initializer.shape[0],
            char_embed_initializer.shape[1],
            weights=[char_embed_initializer],
            mask_zero=True)
        self.char_conv = tf.keras.layers.Conv1D(
            filters=char_embed_initializer.shape[1], kernel_size=5)
        self.projection_conv = tf.keras.layers.Conv1D(filters=d_model,
                                                      kernel_size=1)

        self.highway = Stack([Highway(dropout=dropout) for _ in range(2)])

        self.position_embedding = PositionEmbedding()
        self.dropout = None if dropout is None else tf.keras.layers.Dropout(
            dropout)
        self.batch_norm = None if batch_norm is False else tf.keras.layers.BatchNormalization(
        )
示例#8
0
    def __init__(self,
                 n_conv: int,
                 n_heads: int,
                 filter_size: int,
                 hidden_size: int,
                 dropout: Optional[float] = None) -> None:
        """QANet Encoder Block

        Arguments:
            n_conv {int} -- Number of convolutions in the encoder layer
            n_heads {int} -- Number of heads in the self-attention
            filter_size {int} -- Filter size in the feed-forward layer
            hidden_size {int} -- Hidden layer size in the feed-forward layer/conv block

        Keyword Arguments:
            dropout {Optional[float]} -- Optional dropout constant (default: {None})
        """

        super().__init__()
        self.conv_stack = Stack(
            [QANetConvBlock(hidden_size, 7, dropout) for _ in range(n_conv)])
        self.self_attention = QANetSelfAttention(n_heads, dropout)
        self.feed_forward = QANetFeedForward(filter_size, hidden_size, dropout)